Add partitioned(_:)

mdznr · mdznr · commit 25cf83dbb9b1 · 2021-07-15T12:24:18.000-07:00
`partitioned(_:)` works like `filter(_:)`, but also returns the excluded elements by returning a tuple of two `Array`s
diff --git a/Guides/Partition.md b/Guides/Partition.md
@@ -42,6 +42,32 @@ let p = numbers.partitioningIndex(where: { $0.isMultiple(of: 20) })
 // numbers[p...] = [20, 40, 60]
 ```
 
+The standard library’s existing `filter(_:)` method provides functionality to
+get the elements that do match a given predicate. `partitioned(_:)` returns
+both the elements that match the preciate as well as those that don’t, as a
+tuple.
+
+```swift
+let cast = ["Vivien", "Marlon", "Kim", "Karl"]
+let (longNames , shortNames) = cast.bifurcate({ $0.count < 5 })
+print(longNames)
+// Prints "["Vivien", "Marlon"]"
+print(shortNames)
+// Prints "["Kim", "Karl"]"
+```
+
+There’s also a function to bifurcate a collection into a prefix and a suffix, up
+to but not including a given index:
+
+```swift
+let cast = ["Vivien", "Marlon", "Kim", "Karl"]
+let (callbacks, alternates) = cast.bifurcate(upTo: 2)
+print(callbacks)
+// Prints "["Vivien", "Marlon"]"
+print(alternates)
+// Prints "["Kim", "Karl"]"
+```
+
 ## Detailed Design
 
 All mutating methods are declared as extensions to `MutableCollection`.
@@ -69,11 +95,17 @@ extension Collection {
         where belongsInSecondPartition: (Element) throws -> Bool
     ) rethrows -> Index
 }
+
+extension Sequence {
+	public func bifurcate(
+	  _ belongsInFirstCollection: (Element) throws -> Bool
+	) rethrows -> ([Element], [Element])
+}
 ```
 
 ### Complexity
 
-The existing partition is an O(_n_) operations, where _n_ is the length of the
+The existing partition is an O(_n_) operation, where _n_ is the length of the
 range to be partitioned, while the stable partition is O(_n_ log _n_). Both
 partitions have algorithms with improved performance for bidirectional
 collections, so it would be ideal for those to be customization points were they
@@ -82,6 +114,9 @@ to eventually land in the standard library.
 `partitioningIndex(where:)` is a slight generalization of a binary search, and
 is an O(log _n_) operation for random-access collections; O(_n_) otherwise.
 
+`partitioned(_:)` is an O(_n_) operation, where _n_ is the number of elements in
+the original sequence.
+
 ### Comparison with other languages
 
 **C++:** The `<algorithm>` library defines `partition`, `stable_partition`, and
diff --git a/README.md b/README.md
@@ -27,6 +27,7 @@ Read more about the package, and the intent behind it, in the [announcement on s
 #### Subsetting operations
 
 - [`compacted()`](https://github.com/apple/swift-algorithms/blob/main/Guides/Compacted.md): Drops the `nil`s from a sequence or collection, unwrapping the remaining elements.
+- [`partitioned(_:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/Partition.md): Returns the elements in a sequence or collection that do and not match a given predciate.
 - [`randomSample(count:)`, `randomSample(count:using:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/RandomSampling.md): Randomly selects a specific number of elements from a collection.
 - [`randomStableSample(count:)`, `randomStableSample(count:using:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/RandomSampling.md): Randomly selects a specific number of elements from a collection, preserving their original relative order.
 - [`striding(by:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/Stride.md): Returns every nth element of a collection.
diff --git a/Sources/Algorithms/Partition.swift b/Sources/Algorithms/Partition.swift
@@ -2,7 +2,7 @@
 //
 // This source file is part of the Swift Algorithms open source project
 //
-// Copyright (c) 2020 Apple Inc. and the Swift project authors
+// Copyright (c) 2021 Apple Inc. and the Swift project authors
 // Licensed under Apache License v2.0 with Runtime Library Exception
 //
 // See https://swift.org/LICENSE.txt for license information
@@ -204,3 +204,156 @@ extension Collection {
   }
 }
 
+//===----------------------------------------------------------------------===//
+// partitioned(_:)
+//===----------------------------------------------------------------------===//
+
+extension Sequence {
+  /// Returns two arrays containing, in order, the elements of the sequence that
+  /// do and don’t satisfy the given predicate, respectively.
+  ///
+  /// In this example, `partitioned(_:)` is used to separate the input based on
+  /// names that aren’t and are shorter than five characters, respectively:
+  ///
+  ///     let cast = ["Vivien", "Marlon", "Kim", "Karl"]
+  ///     let (longNames, shortNames) = cast.partitioned({ $0.count < 5 })
+  ///     print(longNames)
+  ///     // Prints "["Vivien", "Marlon"]"
+  ///     print(shortNames)
+  ///     // Prints "["Kim", "Karl"]"
+  ///
+  /// - Parameter belongsInSecondCollection: A closure that takes an element of
+  /// the sequence as its argument and returns a Boolean value indicating
+  /// whether the element should be included in the second returned array.
+  /// Otherwise, the element will appear in the first returned array.
+  ///
+  /// - Returns: Two arrays with with all of the elements of the receiver. The
+  /// first array contains all the elements that `belongsInSecondCollection`
+  /// didn’t allow, and the second array contains all the elements that
+  /// `belongsInSecondCollection` allowed.
+  ///
+  /// - Complexity: O(*n*), where *n* is the length of the sequence.
+  ///
+  /// - Note: This algorithm performs a bit slower than the same algorithm on
+  /// `RandomAccessCollection` since the size of the sequence is unknown, unlike
+  /// `RandomAccessCollection`.
+  @inlinable
+  public func partitioned(
+	_ belongsInSecondCollection: (Element) throws -> Bool
+  ) rethrows -> ([Element], [Element]) {
+	var lhs = ContiguousArray<Element>()
+	var rhs = ContiguousArray<Element>()
+	
+	for element in self {
+	  if try belongsInSecondCollection(element) {
+		rhs.append(element)
+	  } else {
+		lhs.append(element)
+	  }
+	}
+	
+	return _tupleMap((lhs, rhs), { Array($0) })
+  }
+}
+
+extension Collection {
+  // This is a specialized version of the same algorithm on `Sequence` that
+  // avoids reallocation of arrays since `count` is known ahead of time.
+  @inlinable
+  public func partitioned(
+	_ belongsInSecondCollection: (Element) throws -> Bool
+  ) rethrows -> ([Element], [Element]) {
+	guard !self.isEmpty else {
+	  return ([], [])
+	}
+	
+	// Since `RandomAccessCollection`s have known sizes (access to `count` is
+	// constant time, O(1)), we can allocate one array of size `self.count`,
+	// then insert items at the beginning or end of that contiguous block. This
+	// way, we don’t have to do any dynamic array resizing. Since we insert the
+	// right elements on the right side in reverse order, we need to reverse
+	// them back to the original order at the end.
+	
+	let count = self.count
+	
+	// Inside of the `initializer` closure, we set what the actual mid-point is.
+	// We will use this to partitioned the single array into two in constant time.
+	var midPoint: Int = 0
+	
+	let elements = try [Element](
+	  unsafeUninitializedCapacity: count,
+	  initializingWith: { buffer, initializedCount in
+		var lhs = buffer.baseAddress!
+		var rhs = lhs + buffer.count
+		do {
+		  for element in self {
+			if try belongsInSecondCollection(element) {
+			  rhs -= 1
+			  rhs.initialize(to: element)
+			} else {
+			  lhs.initialize(to: element)
+			  lhs += 1
+			}
+		  }
+		  
+		  let rhsIndex = rhs - buffer.baseAddress!
+		  buffer[rhsIndex...].reverse()
+		  initializedCount = buffer.count
+		  
+		  midPoint = rhsIndex
+		} catch {
+		  let lhsCount = lhs - buffer.baseAddress!
+		  let rhsCount = (buffer.baseAddress! + buffer.count) - rhs
+		  buffer.baseAddress!.deinitialize(count: lhsCount)
+		  rhs.deinitialize(count: rhsCount)
+		  throw error
+		}
+	  })
+	
+	let collections = elements.partitioned(upTo: midPoint)
+	return _tupleMap(collections, { Array($0) })
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// partitioned(upTo:)
+//===----------------------------------------------------------------------===//
+
+extension Collection {
+  /// Splits the receiving collection into two at the specified index
+  /// - Parameter index: The index within the receiver to split the collection
+  /// - Returns: A tuple with the first and second parts of the receiving
+  /// collection after splitting it
+  /// - Note: The first subsequence in the returned tuple does *not* include
+  /// the element at `index`. That element is in the second subsequence.
+  /// - Complexity: O(*1*)
+  @inlinable
+  public func partitioned(upTo index: Index) -> (SubSequence, SubSequence) {
+	return (
+	  self[self.startIndex..<index],
+	  self[index..<self.endIndex]
+	)
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// _tupleMap(_:_:)
+//===----------------------------------------------------------------------===//
+
+/// Returns a tuple containing the results of mapping the given closure over
+/// each of the tuple’s elements.
+/// - Parameters:
+///   - x: The tuple to transform
+///   - transform: A mapping closure. `transform` accepts an element of this
+///   sequence as its parameter and returns a transformed
+/// - Returns: A tuple containing the transformed elements of this tuple.
+@usableFromInline
+internal func _tupleMap<T, U>(
+  _ x: (T, T),
+  _ transform: (T) throws -> U
+) rethrows -> (U, U) {
+  return (
+	try transform(x.0),
+	try transform(x.1)
+  )
+}
diff --git a/Tests/SwiftAlgorithmsTests/PartitionTests.swift b/Tests/SwiftAlgorithmsTests/PartitionTests.swift
@@ -133,4 +133,63 @@ final class PartitionTests: XCTestCase {
       }
     }
   }
+  
+  func testPartitionedWithEmptyInput() {
+    let input: [Int] = []
+    
+    let s0 = input.partitioned({ _ in return true })
+    
+    XCTAssertTrue(s0.0.isEmpty)
+    XCTAssertTrue(s0.1.isEmpty)
+  }
+  
+  /// Test the example given in the `partitioned(_:)` documentation
+  func testPartitionedExample() throws {
+    let cast = ["Vivien", "Marlon", "Kim", "Karl"]
+    let (longNames, shortNames) = cast.partitioned({ $0.count < 5 })
+    XCTAssertEqual(longNames, ["Vivien", "Marlon"])
+    XCTAssertEqual(shortNames, ["Kim", "Karl"])
+  }
+  
+  func testPartitionedWithPredicate() throws {
+    let s0 = ["A", "B", "C", "D"].partitioned({ $0 == $0.lowercased() })
+    let s1 = ["a", "B", "C", "D"].partitioned({ $0 == $0.lowercased() })
+    let s2 = ["a", "B", "c", "D"].partitioned({ $0 == $0.lowercased() })
+    let s3 = ["a", "B", "c", "d"].partitioned({ $0 == $0.lowercased() })
+    
+    XCTAssertEqual(s0.0, ["A", "B", "C", "D"])
+    XCTAssertEqual(s0.1, [])
+    
+    XCTAssertEqual(s1.0, ["B", "C", "D"])
+    XCTAssertEqual(s1.1, ["a"])
+    
+    XCTAssertEqual(s2.0, ["B", "D"])
+    XCTAssertEqual(s2.1, ["a", "c"])
+    
+    XCTAssertEqual(s3.0, ["B"])
+    XCTAssertEqual(s3.1, ["a", "c", "d"])
+  }
+  
+  func testPartitionedUpToIndex() throws {
+    let s0 = ["A", "B", "C", "D"].partitioned(upTo: 0)
+    let s1 = ["A", "B", "C", "D"].partitioned(upTo: 1)
+    let s2 = ["A", "B", "C", "D"].partitioned(upTo: 2)
+    let s3 = ["A", "B", "C", "D"].partitioned(upTo: 3)
+    let s4 = ["A", "B", "C", "D"].partitioned(upTo: 4)
+    
+    XCTAssertEqual(s0.0, [])
+    XCTAssertEqual(s0.1, ["A", "B", "C", "D"])
+    
+    XCTAssertEqual(s1.0, ["A"])
+    XCTAssertEqual(s1.1, ["B", "C", "D"])
+    
+    XCTAssertEqual(s2.0, ["A", "B"])
+    XCTAssertEqual(s2.1, ["C", "D"])
+    
+    XCTAssertEqual(s3.0, ["A", "B", "C"])
+    XCTAssertEqual(s3.1, ["D"])
+    
+    XCTAssertEqual(s4.0, ["A", "B", "C", "D"])
+    XCTAssertEqual(s4.1, [])
+  }
 }