Skip to content

Commit 1657e26

Browse files
committed
Simpler and more correct iteration algorithm.
The main simplification is replacing the convoluted empty- subsequence-skipping strategy of finding the last adjacent separator with a concise search for the next non-separator. Thanks, @natecook1000. The main correctness improvement is keeping track of the number of elements returned from the iterator, so a final empty subsequence can be returned if necessary in the case where the base collection ends with a separator. Added documentation describing the algorithm. Added test coverage, particularly for `omittingEmptySubsequences == false`.
1 parent 900ae90 commit 1657e26

File tree

2 files changed

+173
-48
lines changed

2 files changed

+173
-48
lines changed

Sources/Algorithms/LazySplitCollection.swift

Lines changed: 95 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -9,25 +9,34 @@
99
//
1010
//===----------------------------------------------------------------------===//
1111

12-
/// A collection that lazily splits a base collection into subsequences separated by elements that satisfy the given `whereSeparator` predicate.
12+
/// A collection that lazily splits a base collection into subsequences separated by elements that satisfy the
13+
/// given `whereSeparator` predicate.
1314
///
14-
/// - Note: This type is the result of `x.split(maxSplits:omittingEmptySubsequences:whereSeparator)` and
15-
/// `x.split(separator:maxSplits:omittingEmptySubsequences)`, where `x` conforms to `LazyCollection`.
16-
public struct LazySplitCollection<Base: LazyCollectionProtocol> where Base.Element: Equatable, Base.Elements.Index == Base.Index {
15+
/// - Note: This type is the result of
16+
///
17+
/// x.split(maxSplits:omittingEmptySubsequences:whereSeparator)
18+
/// x.split(separator:maxSplits:omittingEmptySubsequences)
19+
///
20+
/// where `x` conforms to `LazyCollection`.
21+
public struct LazySplitCollection<Base: LazyCollectionProtocol>
22+
where Base.Element: Equatable, Base.Elements.Index == Base.Index {
1723
internal let base: Base
18-
internal let whereSeparator: (Base.Element) -> Bool
24+
internal let isSeparator: (Base.Element) -> Bool
1925
internal let maxSplits: Int
2026
internal let omittingEmptySubsequences: Bool
2127
}
2228

2329
extension LazySplitCollection {
2430
public struct Iterator {
31+
public typealias Index = Base.Index
32+
2533
internal let base: Base
26-
internal let whereSeparator: (Base.Element) -> Bool
34+
internal let isSeparator: (Base.Element) -> Bool
2735
internal let maxSplits: Int
2836
internal let omittingEmptySubsequences: Bool
29-
internal var subSequenceStart: Base.Index
30-
internal var splitCount = 0
37+
internal var subsequenceStart: Base.Index
38+
internal var separatorCount = 0
39+
internal var sequenceLength = 0
3140

3241
internal init(
3342
base: Base,
@@ -36,10 +45,10 @@ extension LazySplitCollection {
3645
omittingEmptySubsequences: Bool
3746
) {
3847
self.base = base
39-
self.whereSeparator = whereSeparator
48+
self.isSeparator = whereSeparator
4049
self.maxSplits = maxSplits
4150
self.omittingEmptySubsequences = omittingEmptySubsequences
42-
self.subSequenceStart = self.base.startIndex
51+
self.subsequenceStart = self.base.startIndex
4352
}
4453
}
4554
}
@@ -48,53 +57,91 @@ extension LazySplitCollection.Iterator: IteratorProtocol, Sequence {
4857
public typealias Element = Base.Elements.SubSequence
4958

5059
public mutating func next() -> Element? {
51-
guard subSequenceStart < base.endIndex else { return nil }
60+
/// Separators mark the points where we want to split (cut in two) the base collection, removing
61+
/// the separator in the process.
62+
///
63+
/// Each split yields two subsequences, though splitting at the start or end of a sequence yields
64+
/// an empty subsequence where there were no elements adjacent to the cut.
65+
///
66+
/// Thus the maximum number of subsequences returned after iterating the entire base collection
67+
/// (including empty ones, if they are not omitted) will be at most one more than the number of
68+
/// splits made (equivalently, one more than the number of separators encountered).
69+
///
70+
/// The number of splits is limited by `maxSplits`, and thus may be less than total number of
71+
/// separators in the base collection.
72+
///
73+
/// [1, 2, 42, 3, 4, 42, 5].split(separator: 42,
74+
/// omittingEmptySubsequences: false)
75+
/// // first split -> [1, 2], [3, 4, 42, 5]
76+
/// // last split -> [1, 2], [3, 4], [5]
77+
///
78+
/// [1, 2, 42, 3, 4, 42, 5, 42].split(separator: 42,
79+
/// maxSplits: 2,
80+
/// omittingEmptySubsequences: false)
81+
/// // first split -> [1, 2], [3, 4, 42, 5, 42]
82+
/// // last split -> [1, 2], [3, 4], [5, 42]
83+
///
84+
/// [42, 1, 42].split(separator: 42, omittingEmptySubsequences: false)
85+
/// // first split -> [], [1, 42]
86+
/// // last split -> [], [1], []
87+
///
88+
/// [42, 42].split(separator: 42, omittingEmptySubsequences: false)
89+
/// // first split -> [], [42]
90+
/// // last split -> [], [], []
91+
///
92+
/// Preconditions:
93+
/// `subsequenceStart` points to the beginning of the next subsequence to return (which may
94+
/// turn out to be empty), or the end of the base collection.
5295

53-
var subSequenceEnd: Base.Index
54-
var lastAdjacentSeparator: Base.Index
96+
guard subsequenceStart < base.endIndex else {
97+
if !omittingEmptySubsequences && sequenceLength < separatorCount + 1 {
98+
/// We've reached the end of the base collection, and we're returning empty subsequences, but we
99+
/// haven't yet returned one more subsequence than the number of splits we've performed (i.e., the
100+
/// number of separators we've encountered). This happens when the last element of the base
101+
/// collection is a separator. Return one last empty subsequence.
102+
sequenceLength += 1
103+
return base.elements[subsequenceStart..<subsequenceStart]
104+
} else {
105+
return nil
106+
}
107+
}
108+
109+
/// The non-inclusive end of the next subsequence is marked by the next separator, or the end of the base collection.
110+
var subsequenceEnd: Base.Index
55111

56-
if splitCount < maxSplits {
57-
splitCount += 1
58-
subSequenceEnd = base[subSequenceStart...].firstIndex(where: whereSeparator) ?? base.endIndex
59-
lastAdjacentSeparator = subSequenceEnd
112+
/// The number of separators encountered thus far is identical to the number of splits performed thus far.
113+
if separatorCount < maxSplits {
114+
subsequenceEnd = base[subsequenceStart...].firstIndex(where: isSeparator) ?? base.endIndex
60115

61-
if omittingEmptySubsequences {
62-
/// TODO: should be able to replace this raw loop with something like
63-
/// ```
64-
/// lastAdjacentSeparator = indexBeforeFirst { !whereSeparator($0) } ?? base.endIndex
65-
/// ```
66-
/// when available.
67-
while lastAdjacentSeparator < base.endIndex {
68-
let next = base.index(after: lastAdjacentSeparator)
69-
if next < base.endIndex && whereSeparator(base[next]) {
70-
lastAdjacentSeparator = next
71-
} else {
72-
break
73-
}
116+
if omittingEmptySubsequences && base[subsequenceStart..<subsequenceEnd].isEmpty {
117+
/// Find the next sequence of non-separators.
118+
subsequenceStart = base[subsequenceEnd...].firstIndex(where: { !isSeparator($0) }) ?? base.endIndex
119+
if subsequenceStart == base.endIndex {
120+
/// No non-separators left in the base collection, so we're done.
121+
return nil
74122
}
123+
subsequenceEnd = base[subsequenceStart...].firstIndex(where: isSeparator) ?? base.endIndex
75124
}
76125
} else {
77-
subSequenceEnd = base.endIndex
78-
lastAdjacentSeparator = subSequenceEnd
126+
/// We've performed the requested number of splits. Return all remaining elements in the base collection as one final subsequence.
127+
subsequenceEnd = base.endIndex
79128
}
80129

81130
defer {
82-
if lastAdjacentSeparator < base.endIndex {
83-
subSequenceStart = base.index(after: lastAdjacentSeparator)
84-
} else {
85-
subSequenceStart = base.endIndex
86-
}
131+
separatorCount += subsequenceEnd < base.endIndex ? 1 : 0
132+
sequenceLength += 1
133+
subsequenceStart = subsequenceEnd < base.endIndex ? base.index(after: subsequenceEnd) : base.endIndex
87134
}
88135

89-
return base.elements[subSequenceStart..<subSequenceEnd]
136+
return base.elements[subsequenceStart..<subsequenceEnd]
90137
}
91138
}
92139

93140
extension LazySplitCollection: LazySequenceProtocol {
94141
public func makeIterator() -> Iterator {
95142
return Iterator(
96143
base: self.base,
97-
whereSeparator: self.whereSeparator,
144+
whereSeparator: self.isSeparator,
98145
maxSplits: self.maxSplits,
99146
omittingEmptySubsequences: self.omittingEmptySubsequences
100147
)
@@ -106,8 +153,9 @@ extension LazyCollection where Element: Equatable {
106153
/// that don't contain elements satisfying the given predicate.
107154
///
108155
/// The resulting lazy sequence consists of at most `maxSplits + 1` subsequences.
109-
/// Elements that are used to split the sequence are not returned as part of
110-
/// any subsequence.
156+
/// Elements that are used to split the collection are not returned as part of any
157+
/// subsequence (except possibly the last one, in the case where `maxSplits` is
158+
/// less than the number of separators in the collection).
111159
///
112160
/// The following examples show the effects of the `maxSplits` and
113161
/// `omittingEmptySubsequences` parameters when lazily splitting a string using a
@@ -179,13 +227,13 @@ extension LazyCollection where Element: Equatable {
179227
func split(
180228
maxSplits: Int = Int.max,
181229
omittingEmptySubsequences: Bool = true,
182-
whereSeparator: @escaping (Base.Element) -> Bool
230+
whereSeparator isSeparator: @escaping (Base.Element) -> Bool
183231
) -> LazySplitCollection<Self> {
184232
precondition(maxSplits >= 0, "Must take zero or more splits")
185233

186234
return LazySplitCollection(
187235
base: self,
188-
whereSeparator: whereSeparator,
236+
isSeparator: isSeparator,
189237
maxSplits: maxSplits,
190238
omittingEmptySubsequences: omittingEmptySubsequences
191239
)
@@ -195,8 +243,9 @@ extension LazyCollection where Element: Equatable {
195243
/// around elements equal to the given element.
196244
///
197245
/// The resulting lazy sequence consists of at most `maxSplits + 1` subsequences.
198-
/// Elements that are used to split the collection are not returned as part
199-
/// of any subsequence.
246+
/// Elements that are used to split the collection are not returned as part of any
247+
/// subsequence (except possibly the last one, in the case where `maxSplits` is
248+
/// less than the number of separators in the collection).
200249
///
201250
/// The following examples show the effects of the `maxSplits` and
202251
/// `omittingEmptySubsequences` parameters when splitting a string at each
@@ -272,7 +321,7 @@ extension LazyCollection where Element: Equatable {
272321

273322
return LazySplitCollection(
274323
base: self,
275-
whereSeparator: { $0 == separator },
324+
isSeparator: { $0 == separator },
276325
maxSplits: maxSplits,
277326
omittingEmptySubsequences: omittingEmptySubsequences
278327
)

Tests/SwiftAlgorithmsTests/LazySplitCollectionTests.swift

Lines changed: 78 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,79 @@ final class LazySplitCollectionTests: XCTestCase {
2828
}
2929

3030
func testIntsWithTrailingMultipleAdjacentSeparators() {
31-
let nums = [1, 2, 42, 3, 4, 42, 42, 5, 6, 42, 7, 42, 42, 42]
31+
let nums = [1, 2, 42, 3, 4, 42, 42, 5, 6, 42, 7, 42, 42, 42,]
3232
let expectedResult = nums.split(separator: 42)
3333
let testResult = nums.lazy.split(separator: 42)
34-
for nums in testResult { print(nums.debugDescription) }
34+
XCTAssertEqualSequences(testResult, expectedResult)
35+
}
36+
37+
func testIntsAllSeparators() {
38+
let nums = [42, 42, 42, 42, 42,]
39+
let expectedResult = nums.split(separator: 42)
40+
let testResult = nums.lazy.split(separator: 42)
41+
XCTAssertEqualSequences(testResult, expectedResult)
42+
}
43+
44+
func testIntsAllSeparatorsOmittingEmptySubsequences() {
45+
let nums = [42, 42, 42, 42, 42,]
46+
let expectedResult = nums.split(separator: 42, omittingEmptySubsequences: false)
47+
let testResult = nums.lazy.split(separator: 42, omittingEmptySubsequences: false)
48+
XCTAssertEqualSequences(testResult, expectedResult)
49+
}
50+
51+
func testIntsStartWithSeparator() {
52+
let nums = [42, 1, 2, 42, 3, 4, 42, 5, 6, 42, 7,]
53+
let expectedResult = nums.split(separator: 42)
54+
let testResult = nums.lazy.split(separator: 42)
55+
XCTAssertEqualSequences(testResult, expectedResult)
56+
}
57+
58+
func testIntsStartWithSeparatorOmittingEmptySubsequences() {
59+
let nums = [42, 1, 2, 42, 3, 4, 42, 5, 6, 42, 7,]
60+
let expectedResult = nums.split(separator: 42, omittingEmptySubsequences: false)
61+
let testResult = nums.lazy.split(separator: 42, omittingEmptySubsequences: false)
62+
XCTAssertEqualSequences(testResult, expectedResult)
63+
}
64+
65+
func testIntsStartWithSeparatorMaxSplitsOmittingEmptySubsequences() {
66+
let nums = [42, 1, 2, 42, 3, 4, 42, 5, 6, 42, 7,]
67+
let expectedResult = nums.split(separator: 42, maxSplits: 2, omittingEmptySubsequences: false)
68+
let testResult = nums.lazy.split(separator: 42, maxSplits: 2, omittingEmptySubsequences: false)
69+
XCTAssertEqualSequences(testResult, expectedResult)
70+
}
71+
72+
func testSingleElement() {
73+
let num = [1]
74+
let expectedResult = num.split(separator: 42)
75+
let testResult = num.lazy.split(separator: 42)
76+
XCTAssertEqualSequences(testResult, expectedResult)
77+
}
78+
79+
func testSingleSeparator() {
80+
let num = [42]
81+
let expectedResult = num.split(separator: 42)
82+
let testResult = num.lazy.split(separator: 42)
83+
XCTAssertEqualSequences(testResult, expectedResult)
84+
}
85+
86+
func testSingleSeparatorOmittingEmptySubsequences() {
87+
let num = [42]
88+
let expectedResult = num.split(separator: 42, omittingEmptySubsequences: false)
89+
let testResult = num.lazy.split(separator: 42, omittingEmptySubsequences: false)
90+
XCTAssertEqualSequences(testResult, expectedResult)
91+
}
92+
93+
func testNonSeparatorSandwich() {
94+
let nums = [42, 1, 42,]
95+
let expectedResult = nums.split(separator: 42)
96+
let testResult = nums.lazy.split(separator: 42)
97+
XCTAssertEqualSequences(testResult, expectedResult)
98+
}
99+
100+
func testNonSeparatorSandwichOmittingEmptySubsequences() {
101+
let nums = [42, 1, 42,]
102+
let expectedResult = nums.split(separator: 42, omittingEmptySubsequences: false)
103+
let testResult = nums.lazy.split(separator: 42, omittingEmptySubsequences: false)
35104
XCTAssertEqualSequences(testResult, expectedResult)
36105
}
37106

@@ -97,4 +166,11 @@ final class LazySplitCollectionTests: XCTestCase {
97166
let testSubject = "foo.bar".lazy.split(separator: ".")
98167
XCTAssertLazySequence(testSubject)
99168
}
169+
170+
func testEmptyEquatableCollection() {
171+
let empty: [Int] = []
172+
let expectedResult = empty.split(separator: 42)
173+
let testResult = empty.lazy.split(separator: 42)
174+
XCTAssertEqualSequences(expectedResult, testResult)
175+
}
100176
}

0 commit comments

Comments
 (0)