Skip to content

Commit 2819c44

Browse files
committed
Add LazySplitSequence.
Per @natecook1000's request, a lazy sequence splitter that vends arrays.
1 parent 39f9128 commit 2819c44

File tree

2 files changed

+406
-0
lines changed

2 files changed

+406
-0
lines changed
Lines changed: 321 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,321 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// This source file is part of the Swift Algorithms open source project
4+
//
5+
// Copyright (c) 2021 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
//
10+
//===----------------------------------------------------------------------===//
11+
12+
/// A sequence that lazily splits a base sequence into subsequences separated by
13+
/// elements that satisfy the given `whereSeparator` predicate.
14+
///
15+
/// - Note: This type is the result of
16+
///
17+
/// x.split(maxSplits:omittingEmptySubsequences:whereSeparator)
18+
/// x.split(separator:maxSplits:omittingEmptySubsequences)
19+
///
20+
/// where `x` conforms to `LazySequenceProtocol`.
21+
public struct LazySplitSequence<Base: Sequence> {
22+
internal let base: Base
23+
internal let isSeparator: (Base.Element) -> Bool
24+
internal let maxSplits: Int
25+
internal let omittingEmptySubsequences: Bool
26+
}
27+
28+
extension LazySplitSequence {
29+
public struct Iterator {
30+
public typealias Element = [Base.Element]
31+
32+
internal var base: Base.Iterator
33+
internal let isSeparator: (Base.Element) -> Bool
34+
internal let maxSplits: Int
35+
internal let omittingEmptySubsequences: Bool
36+
internal var subsequence: Element = []
37+
internal var separatorCount = 0
38+
internal var sequenceLength = 0
39+
40+
internal init(
41+
base: Base.Iterator,
42+
whereSeparator: @escaping (Base.Element) -> Bool,
43+
maxSplits: Int,
44+
omittingEmptySubsequences: Bool
45+
) {
46+
self.base = base
47+
self.isSeparator = whereSeparator
48+
self.maxSplits = maxSplits
49+
self.omittingEmptySubsequences = omittingEmptySubsequences
50+
}
51+
}
52+
}
53+
54+
extension LazySplitSequence.Iterator: IteratorProtocol {
55+
public mutating func next() -> Element? {
56+
/// Separators mark the points where we want to split (cut in two) the base
57+
/// collection, removing the separator in the process.
58+
///
59+
/// Each split yields two subsequences, though splitting at the start or end
60+
/// of a sequence yields an empty subsequence where there were no elements
61+
/// adjacent to the cut.
62+
///
63+
/// Thus the maximum number of subsequences returned after iterating the
64+
/// entire base collection (including empty ones, if they are not omitted)
65+
/// will be at most one more than the number of splits made (equivalently,
66+
/// one more than the number of separators encountered).
67+
///
68+
/// The number of splits is limited by `maxSplits`, and thus may be less
69+
/// than the total number of separators in the base collection.
70+
///
71+
/// [1, 2, 42, 3, 4, 42, 5].split(separator: 42,
72+
/// omittingEmptySubsequences: false)
73+
/// // first split -> [1, 2], [3, 4, 42, 5]
74+
/// // last split -> [1, 2], [3, 4], [5]
75+
///
76+
/// [1, 2, 42, 3, 4, 42, 5, 42].split(separator: 42,
77+
/// maxSplits: 2,
78+
/// omittingEmptySubsequences: false)
79+
/// // first split -> [1, 2], [3, 4, 42, 5, 42]
80+
/// // last split -> [1, 2], [3, 4], [5, 42]
81+
///
82+
/// [42, 1, 42].split(separator: 42, omittingEmptySubsequences: false)
83+
/// // first split -> [], [1, 42]
84+
/// // last split -> [], [1], []
85+
///
86+
/// [42, 42].split(separator: 42, omittingEmptySubsequences: false)
87+
/// // first split -> [], [42]
88+
/// // last split -> [], [], []
89+
90+
var currentElement = base.next()
91+
92+
while currentElement != nil {
93+
if separatorCount < maxSplits && isSeparator(currentElement!) {
94+
separatorCount += 1
95+
96+
if omittingEmptySubsequences && subsequence.isEmpty {
97+
currentElement = base.next()
98+
continue
99+
} else {
100+
break
101+
}
102+
} else {
103+
subsequence.append(currentElement!)
104+
currentElement = base.next()
105+
}
106+
}
107+
108+
if currentElement == nil {
109+
if sequenceLength < separatorCount + 1 {
110+
if !subsequence.isEmpty || !omittingEmptySubsequences {
111+
sequenceLength += 1
112+
return subsequence
113+
} else {
114+
return nil
115+
}
116+
} else {
117+
return nil
118+
}
119+
}
120+
121+
defer {
122+
sequenceLength += 1
123+
subsequence = []
124+
}
125+
126+
return subsequence
127+
}
128+
}
129+
130+
extension LazySplitSequence: LazySequenceProtocol {
131+
public func makeIterator() -> Iterator {
132+
return Iterator(
133+
base: base.makeIterator(),
134+
whereSeparator: self.isSeparator,
135+
maxSplits: self.maxSplits,
136+
omittingEmptySubsequences: self.omittingEmptySubsequences
137+
)
138+
}
139+
}
140+
141+
extension LazySequenceProtocol {
142+
/// Lazily returns the longest possible subsequences of the sequence, in order,
143+
/// that don't contain elements satisfying the given predicate.
144+
///
145+
/// The resulting lazy sequence consists of at most `maxSplits + 1` subsequences.
146+
/// Elements that are used to split the sequence are not returned as part of any
147+
/// subsequence (except possibly the last one, in the case where `maxSplits` is
148+
/// less than the number of separators in the sequence).
149+
///
150+
/// The following examples show the effects of the `maxSplits` and
151+
/// `omittingEmptySubsequences` parameters when lazily splitting a string using a
152+
/// closure that matches spaces. The first use of `split` returns each word
153+
/// that was originally separated by one or more spaces.
154+
///
155+
/// let line = "BLANCHE: I don't want realism. I want magic!"
156+
/// for spaceless in line.lazy.split(whereSeparator: { $0 == " " }) {
157+
/// print(spaceless)
158+
/// }
159+
/// // Prints
160+
/// // BLANCHE:
161+
/// // I
162+
/// // don't
163+
/// // want
164+
/// // realism.
165+
/// // I
166+
/// // want
167+
/// // magic!
168+
///
169+
/// The second example passes `1` for the `maxSplits` parameter, so the
170+
/// original string is split just once, into two new strings.
171+
///
172+
/// for spaceless in line.lazy.split(maxSplits: 1, whereSeparator: { $0 == " " }) {
173+
/// print(spaceless)
174+
/// }
175+
/// // Prints
176+
/// // BLANCHE:
177+
/// // I don't want realism. I want magic!
178+
///
179+
/// The final example passes `false` for the `omittingEmptySubsequences`
180+
/// parameter, so the returned array contains empty strings where spaces
181+
/// were repeated.
182+
///
183+
/// for spaceless in line.lazy.split(omittingEmptySubsequences: false, whereSeparator: { $0 == " " }) {
184+
/// print(spaceless)
185+
/// }
186+
/// // Prints
187+
/// // BLANCHE:
188+
/// //
189+
/// //
190+
/// // I
191+
/// // don't
192+
/// // want
193+
/// // realism.
194+
/// // I
195+
/// // want
196+
/// // magic!
197+
///
198+
/// - Parameters:
199+
/// - maxSplits: The maximum number of times to split the sequence, or
200+
/// one less than the number of subsequences to return. If
201+
/// `maxSplits + 1` subsequences are returned, the last one is a suffix
202+
/// of the original sequence containing the remaining elements.
203+
/// `maxSplits` must be greater than or equal to zero. The default value
204+
/// is `Int.max`.
205+
/// - omittingEmptySubsequences: If `false`, an empty subsequence is
206+
/// returned in the result for each pair of consecutive elements
207+
/// satisfying the `isSeparator` predicate and for each element at the
208+
/// start or end of the sequence satisfying the `isSeparator`
209+
/// predicate. The default value is `true`.
210+
/// - whereSeparator: A closure that takes an element as an argument and
211+
/// returns a Boolean value indicating whether the sequence should be
212+
/// split at that element.
213+
/// - Returns: A lazy sequence of subsequences, split from this sequence's
214+
/// elements.
215+
///
216+
/// - Complexity: O(*n*), where *n* is the length of the sequence.
217+
func split(
218+
maxSplits: Int = Int.max,
219+
omittingEmptySubsequences: Bool = true,
220+
whereSeparator isSeparator: @escaping (Element) -> Bool
221+
) -> LazySplitSequence<Elements> {
222+
precondition(maxSplits >= 0, "Must take zero or more splits")
223+
224+
return LazySplitSequence(
225+
base: elements,
226+
isSeparator: isSeparator,
227+
maxSplits: maxSplits,
228+
omittingEmptySubsequences: omittingEmptySubsequences
229+
)
230+
}
231+
}
232+
233+
extension LazySequenceProtocol where Element: Equatable {
234+
/// Lazily returns the longest possible subsequences of the sequence, in order,
235+
/// around elements equal to the given element.
236+
///
237+
/// The resulting lazy sequence consists of at most `maxSplits + 1` subsequences.
238+
/// Elements that are used to split the sequence are not returned as part of any
239+
/// subsequence (except possibly the last one, in the case where `maxSplits` is
240+
/// less than the number of separators in the sequence).
241+
///
242+
/// The following examples show the effects of the `maxSplits` and
243+
/// `omittingEmptySubsequences` parameters when splitting a string at each
244+
/// space character (" "). The first use of `split` returns each word that
245+
/// was originally separated by one or more spaces.
246+
///
247+
/// let line = "BLANCHE: I don't want realism. I want magic!"
248+
/// for spaceless in line.lazy.split(separator: " ") {
249+
/// print(spaceless)
250+
/// }
251+
/// // Prints
252+
/// // BLANCHE:
253+
/// // I
254+
/// // don't
255+
/// // want
256+
/// // realism.
257+
/// // I
258+
/// // want
259+
/// // magic!
260+
///
261+
/// The second example passes `1` for the `maxSplits` parameter, so the
262+
/// original string is split just once, into two new strings.
263+
///
264+
/// for spaceless in line.lazy.split(separator: " ", maxSplits: 1) {
265+
/// print(spaceless)
266+
/// }
267+
/// // Prints
268+
/// // BLANCHE:
269+
/// // I don't want realism. I want magic!
270+
///
271+
/// The final example passes `false` for the `omittingEmptySubsequences`
272+
/// parameter, so the returned array contains empty strings where spaces
273+
/// were repeated.
274+
///
275+
/// for spaceless in line.lazy.split(separator: " ", omittingEmptySubsequences: false) {
276+
/// print(spaceless)
277+
/// }
278+
/// // Prints
279+
/// // BLANCHE:
280+
/// //
281+
/// //
282+
/// // I
283+
/// // don't
284+
/// // want
285+
/// // realism.
286+
/// // I
287+
/// // want
288+
/// // magic!
289+
///
290+
/// - Parameters:
291+
/// - separator: The element that should be split upon.
292+
/// - maxSplits: The maximum number of times to split the sequence, or
293+
/// one less than the number of subsequences to return. If
294+
/// `maxSplits + 1` subsequences are returned, the last one is a suffix
295+
/// of the original sequence containing the remaining elements.
296+
/// `maxSplits` must be greater than or equal to zero. The default value
297+
/// is `Int.max`.
298+
/// - omittingEmptySubsequences: If `false`, an empty subsequence is
299+
/// returned in the result for each consecutive pair of `separator`
300+
/// elements in the sequence and for each instance of `separator` at
301+
/// the start or end of the sequence. If `true`, only nonempty
302+
/// subsequences are returned. The default value is `true`.
303+
/// - Returns: A lazy sequence of subsequences, split from this sequence's
304+
/// elements.
305+
///
306+
/// - Complexity: O(*n*), where *n* is the length of the sequence.
307+
func split(
308+
separator: Element,
309+
maxSplits: Int = Int.max,
310+
omittingEmptySubsequences: Bool = true
311+
) -> LazySplitSequence<Elements> {
312+
precondition(maxSplits >= 0, "Must take zero or more splits")
313+
314+
return LazySplitSequence(
315+
base: elements,
316+
isSeparator: { $0 == separator },
317+
maxSplits: maxSplits,
318+
omittingEmptySubsequences: omittingEmptySubsequences
319+
)
320+
}
321+
}

0 commit comments

Comments
 (0)