Skip to content

Commit db48bc9

Browse files
committed
Make LazySplitCollection a lazy collection.
Accomplished by defining its index to wrap a range and moving the iteration logic into `index(after:)`.
1 parent 3fb8bfa commit db48bc9

File tree

2 files changed

+157
-131
lines changed

2 files changed

+157
-131
lines changed

Sources/Algorithms/LazySplitCollection.swift

Lines changed: 140 additions & 122 deletions
Original file line numberDiff line numberDiff line change
@@ -23,153 +23,162 @@ public struct LazySplitCollection<Base: Collection> {
2323
internal let isSeparator: (Base.Element) -> Bool
2424
internal let maxSplits: Int
2525
internal let omittingEmptySubsequences: Bool
26-
}
27-
28-
extension LazySplitCollection {
29-
public struct Iterator {
30-
public typealias Index = Base.Index
26+
internal var _startIndex: Index
3127

32-
internal let base: Base
33-
internal let isSeparator: (Base.Element) -> Bool
34-
internal let maxSplits: Int
35-
internal let omittingEmptySubsequences: Bool
36-
internal var subsequenceStart: Base.Index
37-
internal var separatorCount = 0
38-
internal var sequenceLength = 0
28+
internal init(
29+
base: Base,
30+
isSeparator: @escaping (Base.Element) -> Bool,
31+
maxSplits: Int,
32+
omittingEmptySubsequences: Bool
33+
) {
34+
self.base = base
35+
self.isSeparator = isSeparator
36+
self.maxSplits = maxSplits
37+
self.omittingEmptySubsequences = omittingEmptySubsequences
38+
self._startIndex = Index(baseRange: base.startIndex..<base.startIndex)
3939

40-
internal init(
41-
base: Base,
42-
whereSeparator: @escaping (Base.Element) -> Bool,
43-
maxSplits: Int,
44-
omittingEmptySubsequences: Bool
45-
) {
46-
self.base = base
47-
self.isSeparator = whereSeparator
48-
self.maxSplits = maxSplits
49-
self.omittingEmptySubsequences = omittingEmptySubsequences
50-
self.subsequenceStart = self.base.startIndex
40+
if !base.isEmpty {
41+
// Precompute the start index.
42+
_startIndex = indexForSubsequence(atOrAfter: base.startIndex)
5143
}
5244
}
5345
}
5446

55-
extension LazySplitCollection.Iterator: IteratorProtocol {
56-
public typealias Element = Base.SubSequence
57-
58-
public mutating func next() -> Element? {
59-
/// Separators mark the points where we want to split (cut in two) the base
60-
/// collection, removing the separator in the process.
61-
///
62-
/// Each split yields two subsequences, though splitting at the start or end
63-
/// of a sequence yields an empty subsequence where there were no elements
64-
/// adjacent to the cut.
65-
///
66-
/// Thus the maximum number of subsequences returned after iterating the
67-
/// entire base collection (including empty ones, if they are not omitted)
68-
/// will be at most one more than the number of splits made (equivalently,
69-
/// one more than the number of separators encountered).
70-
///
71-
/// The number of splits is limited by `maxSplits`, and thus may be less
72-
/// than the total number of separators in the base collection.
73-
///
74-
/// [1, 2, 42, 3, 4, 42, 5].split(separator: 42,
75-
/// omittingEmptySubsequences: false)
76-
/// // first split -> [1, 2], [3, 4, 42, 5]
77-
/// // last split -> [1, 2], [3, 4], [5]
78-
///
79-
/// [1, 2, 42, 3, 4, 42, 5, 42].split(separator: 42,
80-
/// maxSplits: 2,
81-
/// omittingEmptySubsequences: false)
82-
/// // first split -> [1, 2], [3, 4, 42, 5, 42]
83-
/// // last split -> [1, 2], [3, 4], [5, 42]
84-
///
85-
/// [42, 1, 42].split(separator: 42, omittingEmptySubsequences: false)
86-
/// // first split -> [], [1, 42]
87-
/// // last split -> [], [1], []
88-
///
89-
/// [42, 42].split(separator: 42, omittingEmptySubsequences: false)
90-
/// // first split -> [], [42]
91-
/// // last split -> [], [], []
92-
///
93-
/// Preconditions:
94-
/// `subsequenceStart` points to the beginning of the next subsequence to
95-
/// return (which may turn out to be empty), or the end of the base
47+
extension LazySplitCollection: LazyCollectionProtocol {
48+
/// Position of a subsequence in a split collection.
49+
public struct Index: Comparable {
50+
/// The range corresponding to the subsequence at this position.
51+
internal let baseRange: Range<Base.Index>
52+
/// The number of subsequences up to and including this position in the
9653
/// collection.
54+
internal let sequenceLength: Int
55+
internal let separatorCount: Int
9756

98-
guard subsequenceStart < base.endIndex else {
99-
if !omittingEmptySubsequences && sequenceLength < separatorCount + 1 {
100-
// We've reached the end of the base collection, and we're returning
101-
// empty subsequences, but we haven't yet returned one more subsequence
102-
// than the number of splits we've performed (i.e., the number of
103-
// separators we've encountered). This happens when the last element of
104-
// the base collection is a separator. Return one last empty
105-
// subsequence.
106-
sequenceLength += 1
107-
return base[subsequenceStart..<subsequenceStart]
108-
} else {
109-
return nil
110-
}
57+
internal init(
58+
baseRange: Range<Base.Index>,
59+
sequenceLength: Int = 0,
60+
separatorCount: Int = 0
61+
) {
62+
self.baseRange = baseRange
63+
self.sequenceLength = sequenceLength
64+
self.separatorCount = separatorCount
65+
}
66+
67+
public static func == (lhs: Index, rhs: Index) -> Bool {
68+
// Since each index represents the range of a disparate subsequence, no
69+
// two unique indices will have the same lower bound.
70+
lhs.baseRange.lowerBound == rhs.baseRange.lowerBound
71+
}
72+
73+
public static func < (lhs: Index, rhs: Index) -> Bool {
74+
// Only use the lower bound to test for ordering, as above.
75+
lhs.baseRange.lowerBound < rhs.baseRange.lowerBound
11176
}
77+
}
11278

113-
// The non-inclusive end of the next subsequence is marked by the next
114-
// separator, or the end of the base collection.
115-
var subsequenceEnd: Base.Index
79+
/// Returns the index of the subsequence starting at or after the given base collection index.
80+
internal func indexForSubsequence(
81+
atOrAfter lowerBound: Base.Index,
82+
sequenceLength: Int = 0,
83+
separatorCount: Int = 0
84+
) -> Index {
85+
var newSeparatorCount = separatorCount
86+
var start = lowerBound
87+
// If we don't have any more splits to do (which we'll determine shortly),
88+
// the end of the next subsequence will be the end of the base collection.
89+
var end = base.endIndex
11690

11791
// The number of separators encountered thus far is identical to the number
11892
// of splits performed thus far.
119-
if separatorCount < maxSplits {
120-
subsequenceEnd =
121-
base[subsequenceStart...].firstIndex(where: isSeparator)
93+
if newSeparatorCount < maxSplits {
94+
// The non-inclusive end of the next subsequence is marked by the next
95+
// separator, or the end of the base collection.
96+
end =
97+
base[start...].firstIndex(where: isSeparator)
12298
?? base.endIndex
12399

124-
if omittingEmptySubsequences
125-
&& base[subsequenceStart..<subsequenceEnd].isEmpty
126-
{
127-
// Find the next sequence of non-separators.
128-
subsequenceStart =
129-
base[subsequenceEnd...].firstIndex(where: { !isSeparator($0) })
130-
?? base.endIndex
131-
if subsequenceStart == base.endIndex {
132-
// No non-separators left in the base collection, so we're done.
133-
return nil
100+
if base[start..<end].isEmpty {
101+
if omittingEmptySubsequences {
102+
// Find the next subsequence of non-separators.
103+
start =
104+
base[end...].firstIndex(where: { !isSeparator($0) })
105+
?? base.endIndex
106+
if start == base.endIndex {
107+
// No non-separators left in the base collection. We're done.
108+
return endIndex
109+
}
110+
end = base[start...].firstIndex(where: isSeparator) ?? base.endIndex
134111
}
135-
subsequenceEnd =
136-
base[subsequenceStart...].firstIndex(where: isSeparator)
137-
?? base.endIndex
138112
}
139-
} else {
140-
// We've performed the requested number of splits. Return all remaining
141-
// elements in the base collection as one final subsequence.
142-
subsequenceEnd = base.endIndex
143113
}
144114

145-
defer {
146-
separatorCount += subsequenceEnd < base.endIndex ? 1 : 0
147-
sequenceLength += 1
148-
subsequenceStart =
149-
subsequenceEnd < base.endIndex
150-
? base.index(after: subsequenceEnd) : base.endIndex
115+
if end < base.endIndex {
116+
newSeparatorCount += 1
151117
}
152118

153-
return base[subsequenceStart..<subsequenceEnd]
119+
return Index(
120+
baseRange: start..<end,
121+
sequenceLength: sequenceLength + 1,
122+
separatorCount: newSeparatorCount
123+
)
124+
}
125+
126+
public var startIndex: Index {
127+
_startIndex
154128
}
155-
}
156129

157-
extension LazySplitCollection: LazySequenceProtocol {
158-
public func makeIterator() -> Iterator {
159-
return Iterator(
160-
base: self.base,
161-
whereSeparator: self.isSeparator,
162-
maxSplits: self.maxSplits,
163-
omittingEmptySubsequences: self.omittingEmptySubsequences
130+
public var endIndex: Index {
131+
Index(baseRange: base.endIndex..<base.endIndex)
132+
}
133+
134+
public func index(after i: Index) -> Index {
135+
precondition(i != endIndex, "Can't advance past endIndex")
136+
137+
var subsequenceStart = i.baseRange.upperBound
138+
if subsequenceStart < base.endIndex {
139+
// If we're not already at the end of the base collection, the previous
140+
// susequence ended with a separator. Start searching for the next
141+
// subsequence at the following element.
142+
subsequenceStart = base.index(after: i.baseRange.upperBound)
143+
}
144+
145+
guard subsequenceStart != base.endIndex else {
146+
if !omittingEmptySubsequences
147+
&& i.sequenceLength < i.separatorCount + 1
148+
{
149+
/// The base collection ended with a separator, so we need to emit one
150+
/// more empty subsequence. Its range can't be equal to that of
151+
/// `endIndex`, else we'll terminate iteration prematurely.
152+
return Index(
153+
baseRange: i.baseRange.upperBound..<i.baseRange.upperBound,
154+
sequenceLength: i.sequenceLength + 1,
155+
separatorCount: i.separatorCount
156+
)
157+
} else {
158+
return endIndex
159+
}
160+
}
161+
162+
return indexForSubsequence(
163+
atOrAfter: subsequenceStart,
164+
sequenceLength: i.sequenceLength,
165+
separatorCount: i.separatorCount
164166
)
165167
}
168+
169+
public subscript(position: Index) -> Base.SubSequence {
170+
precondition(position != endIndex, "Can't subscript using endIndex")
171+
return base[position.baseRange]
172+
}
166173
}
167174

175+
extension LazySplitCollection.Index: Hashable where Base.Index: Hashable {}
176+
168177
extension LazyCollectionProtocol {
169178
/// Lazily returns the longest possible subsequences of the collection, in order,
170179
/// that don't contain elements satisfying the given predicate.
171180
///
172-
/// The resulting lazy sequence consists of at most `maxSplits + 1` subsequences.
181+
/// The resulting lazy collection consists of at most `maxSplits + 1` subsequences.
173182
/// Elements that are used to split the collection are not returned as part of any
174183
/// subsequence (except possibly the last one, in the case where `maxSplits` is
175184
/// less than the number of separators in the collection).
@@ -196,7 +205,10 @@ extension LazyCollectionProtocol {
196205
/// The second example passes `1` for the `maxSplits` parameter, so the
197206
/// original string is split just once, into two new strings.
198207
///
199-
/// for spaceless in line.lazy.split(maxSplits: 1, whereSeparator: { $0 == " " }) {
208+
/// for spaceless in line.lazy.split(
209+
/// maxSplits: 1,
210+
/// whereSeparator: { $0 == " " }
211+
/// ) {
200212
/// print(spaceless)
201213
/// }
202214
/// // Prints
@@ -207,7 +219,10 @@ extension LazyCollectionProtocol {
207219
/// parameter, so the returned array contains empty strings where spaces
208220
/// were repeated.
209221
///
210-
/// for spaceless in line.lazy.split(omittingEmptySubsequences: false, whereSeparator: { $0 == " " }) {
222+
/// for spaceless in line.lazy.split(
223+
/// omittingEmptySubsequences: false,
224+
/// whereSeparator: { $0 == " " }
225+
/// ) {
211226
/// print(spaceless)
212227
/// }
213228
/// // Prints
@@ -237,7 +252,7 @@ extension LazyCollectionProtocol {
237252
/// - whereSeparator: A closure that takes an element as an argument and
238253
/// returns a Boolean value indicating whether the collection should be
239254
/// split at that element.
240-
/// - Returns: A lazy sequence of subsequences, split from this collection's
255+
/// - Returns: A lazy collection of subsequences, split from this collection's
241256
/// elements.
242257
///
243258
/// - Complexity: O(*n*), where *n* is the length of the collection.
@@ -262,7 +277,7 @@ where Element: Equatable {
262277
/// Lazily returns the longest possible subsequences of the collection, in order,
263278
/// around elements equal to the given element.
264279
///
265-
/// The resulting lazy sequence consists of at most `maxSplits + 1` subsequences.
280+
/// The resulting lazy collection consists of at most `maxSplits + 1` subsequences.
266281
/// Elements that are used to split the collection are not returned as part of any
267282
/// subsequence (except possibly the last one, in the case where `maxSplits` is
268283
/// less than the number of separators in the collection).
@@ -300,7 +315,10 @@ where Element: Equatable {
300315
/// parameter, so the returned array contains empty strings where spaces
301316
/// were repeated.
302317
///
303-
/// for spaceless in line.lazy.split(separator: " ", omittingEmptySubsequences: false) {
318+
/// for spaceless in line.lazy.split(
319+
/// separator: " ",
320+
/// omittingEmptySubsequences: false
321+
/// ) {
304322
/// print(spaceless)
305323
/// }
306324
/// // Prints
@@ -328,7 +346,7 @@ where Element: Equatable {
328346
/// elements in the collection and for each instance of `separator` at
329347
/// the start or end of the collection. If `true`, only nonempty
330348
/// subsequences are returned. The default value is `true`.
331-
/// - Returns: A lazy sequence of subsequences, split from this collection's
349+
/// - Returns: A lazy collection of subsequences split from this collection's
332350
/// elements.
333351
///
334352
/// - Complexity: O(*n*), where *n* is the length of the collection.

0 commit comments

Comments
 (0)