Skip to content

Commit d6a01e7

Browse files
committed
Add maxSplits and omitEmpty to split methods
This plumbs those parameters down into the SplitCollection type, and removes Collection conformance for now because (a) we aren't using it, and (b) it looks tricky to implement properly.
1 parent e0b4d5e commit d6a01e7

File tree

2 files changed

+251
-99
lines changed

2 files changed

+251
-99
lines changed

Sources/_StringProcessing/Algorithms/Algorithms/Split.swift

Lines changed: 168 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,28 @@ struct SplitCollection<Searcher: CollectionSearcher> {
1515
public typealias Base = Searcher.Searched
1616

1717
let ranges: RangesCollection<Searcher>
18-
19-
init(ranges: RangesCollection<Searcher>) {
18+
var maxSplits: Int
19+
var omittingEmptySubsequences: Bool
20+
21+
init(
22+
ranges: RangesCollection<Searcher>,
23+
maxSplits: Int,
24+
omittingEmptySubsequences: Bool)
25+
{
2026
self.ranges = ranges
27+
self.maxSplits = maxSplits
28+
self.omittingEmptySubsequences = omittingEmptySubsequences
2129
}
2230

23-
init(base: Base, searcher: Searcher) {
31+
init(
32+
base: Base,
33+
searcher: Searcher,
34+
maxSplits: Int,
35+
omittingEmptySubsequences: Bool)
36+
{
2437
self.ranges = base.ranges(of: searcher)
38+
self.maxSplits = maxSplits
39+
self.omittingEmptySubsequences = omittingEmptySubsequences
2540
}
2641
}
2742

@@ -30,97 +45,127 @@ extension SplitCollection: Sequence {
3045
let base: Base
3146
var index: Base.Index
3247
var ranges: RangesCollection<Searcher>.Iterator
33-
var isDone: Bool
34-
35-
init(ranges: RangesCollection<Searcher>) {
48+
var maxSplits: Int
49+
var omittingEmptySubsequences: Bool
50+
51+
var splitCounter = 0
52+
var isDone = false
53+
54+
init(
55+
ranges: RangesCollection<Searcher>,
56+
maxSplits: Int,
57+
omittingEmptySubsequences: Bool
58+
) {
3659
self.base = ranges.base
3760
self.index = base.startIndex
3861
self.ranges = ranges.makeIterator()
39-
self.isDone = false
62+
self.maxSplits = maxSplits
63+
self.omittingEmptySubsequences = omittingEmptySubsequences
4064
}
4165

4266
public mutating func next() -> Base.SubSequence? {
4367
guard !isDone else { return nil }
4468

45-
guard let range = ranges.next() else {
69+
/// Return the rest of base if it's non-empty or we're including
70+
/// empty subsequences.
71+
func finish() -> Base.SubSequence? {
4672
isDone = true
47-
return base[index...]
73+
return index == base.endIndex && omittingEmptySubsequences
74+
? nil
75+
: base[index...]
76+
}
77+
78+
if splitCounter >= maxSplits {
79+
return finish()
4880
}
4981

50-
defer { index = range.upperBound }
51-
return base[index..<range.lowerBound]
82+
while true {
83+
// If there are no more ranges that matched, return the rest of `base`.
84+
guard let range = ranges.next() else {
85+
return finish()
86+
}
87+
88+
defer { index = range.upperBound }
89+
90+
if omittingEmptySubsequences && index == range.lowerBound {
91+
continue
92+
}
93+
94+
splitCounter += 1
95+
return base[index..<range.lowerBound]
96+
}
5297
}
5398
}
5499

55100
public func makeIterator() -> Iterator {
56-
Iterator(ranges: ranges)
57-
}
58-
}
59-
60-
extension SplitCollection: Collection {
61-
public struct Index {
62-
var start: Base.Index
63-
var base: RangesCollection<Searcher>.Index
64-
var isEndIndex: Bool
65-
}
66-
67-
public var startIndex: Index {
68-
let base = ranges.startIndex
69-
return Index(start: ranges.base.startIndex, base: base, isEndIndex: false)
70-
}
71-
72-
public var endIndex: Index {
73-
Index(start: ranges.base.endIndex, base: ranges.endIndex, isEndIndex: true)
74-
}
75-
76-
public func formIndex(after index: inout Index) {
77-
guard !index.isEndIndex else { fatalError("Cannot advance past endIndex") }
78-
79-
if let range = index.base.range {
80-
let newStart = range.upperBound
81-
ranges.formIndex(after: &index.base)
82-
index.start = newStart
83-
} else {
84-
index.isEndIndex = true
85-
}
86-
}
87-
88-
public func index(after index: Index) -> Index {
89-
var index = index
90-
formIndex(after: &index)
91-
return index
92-
}
93-
94-
public subscript(index: Index) -> Base.SubSequence {
95-
guard !index.isEndIndex else {
96-
fatalError("Cannot subscript using endIndex")
97-
}
98-
let end = index.base.range?.lowerBound ?? ranges.base.endIndex
99-
return ranges.base[index.start..<end]
101+
Iterator(ranges: ranges, maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences)
100102
}
101103
}
102104

103-
extension SplitCollection.Index: Comparable {
104-
static func == (lhs: Self, rhs: Self) -> Bool {
105-
switch (lhs.isEndIndex, rhs.isEndIndex) {
106-
case (false, false):
107-
return lhs.start == rhs.start
108-
case (let lhs, let rhs):
109-
return lhs == rhs
110-
}
111-
}
112-
113-
static func < (lhs: Self, rhs: Self) -> Bool {
114-
switch (lhs.isEndIndex, rhs.isEndIndex) {
115-
case (true, _):
116-
return false
117-
case (_, true):
118-
return true
119-
case (false, false):
120-
return lhs.start < rhs.start
121-
}
122-
}
123-
}
105+
//extension SplitCollection: Collection {
106+
// public struct Index {
107+
// var start: Base.Index
108+
// var base: RangesCollection<Searcher>.Index
109+
// var isEndIndex: Bool
110+
// }
111+
//
112+
// public var startIndex: Index {
113+
// let base = ranges.startIndex
114+
// return Index(start: ranges.base.startIndex, base: base, isEndIndex: false)
115+
// }
116+
//
117+
// public var endIndex: Index {
118+
// Index(start: ranges.base.endIndex, base: ranges.endIndex, isEndIndex: true)
119+
// }
120+
//
121+
// public func formIndex(after index: inout Index) {
122+
// guard !index.isEndIndex else { fatalError("Cannot advance past endIndex") }
123+
//
124+
// if let range = index.base.range {
125+
// let newStart = range.upperBound
126+
// ranges.formIndex(after: &index.base)
127+
// index.start = newStart
128+
// } else {
129+
// index.isEndIndex = true
130+
// }
131+
// }
132+
//
133+
// public func index(after index: Index) -> Index {
134+
// var index = index
135+
// formIndex(after: &index)
136+
// return index
137+
// }
138+
//
139+
// public subscript(index: Index) -> Base.SubSequence {
140+
// guard !index.isEndIndex else {
141+
// fatalError("Cannot subscript using endIndex")
142+
// }
143+
// let end = index.base.range?.lowerBound ?? ranges.base.endIndex
144+
// return ranges.base[index.start..<end]
145+
// }
146+
//}
147+
//
148+
//extension SplitCollection.Index: Comparable {
149+
// static func == (lhs: Self, rhs: Self) -> Bool {
150+
// switch (lhs.isEndIndex, rhs.isEndIndex) {
151+
// case (false, false):
152+
// return lhs.start == rhs.start
153+
// case (let lhs, let rhs):
154+
// return lhs == rhs
155+
// }
156+
// }
157+
//
158+
// static func < (lhs: Self, rhs: Self) -> Bool {
159+
// switch (lhs.isEndIndex, rhs.isEndIndex) {
160+
// case (true, _):
161+
// return false
162+
// case (_, true):
163+
// return true
164+
// case (false, false):
165+
// return lhs.start < rhs.start
166+
// }
167+
// }
168+
//}
124169

125170
// MARK: `ReversedSplitCollection`
126171

@@ -176,10 +221,15 @@ extension ReversedSplitCollection: Sequence {
176221

177222
extension Collection {
178223
func split<Searcher: CollectionSearcher>(
179-
by separator: Searcher
224+
by separator: Searcher,
225+
maxSplits: Int,
226+
omittingEmptySubsequences: Bool
180227
) -> SplitCollection<Searcher> where Searcher.Searched == Self {
181-
// TODO: `maxSplits`, `omittingEmptySubsequences`?
182-
SplitCollection(base: self, searcher: separator)
228+
SplitCollection(
229+
base: self,
230+
searcher: separator,
231+
maxSplits: maxSplits,
232+
omittingEmptySubsequences: omittingEmptySubsequences)
183233
}
184234
}
185235

@@ -198,9 +248,11 @@ extension BidirectionalCollection {
198248
extension Collection {
199249
// TODO: Non-escaping and throwing
200250
func split(
201-
whereSeparator predicate: @escaping (Element) -> Bool
251+
whereSeparator predicate: @escaping (Element) -> Bool,
252+
maxSplits: Int,
253+
omittingEmptySubsequences: Bool
202254
) -> SplitCollection<PredicateConsumer<Self>> {
203-
split(by: PredicateConsumer(predicate: predicate))
255+
split(by: PredicateConsumer(predicate: predicate), maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences)
204256
}
205257
}
206258

@@ -216,9 +268,11 @@ extension BidirectionalCollection where Element: Equatable {
216268

217269
extension Collection where Element: Equatable {
218270
func split(
219-
by separator: Element
271+
by separator: Element,
272+
maxSplits: Int,
273+
omittingEmptySubsequences: Bool
220274
) -> SplitCollection<PredicateConsumer<Self>> {
221-
split(whereSeparator: { $0 == separator })
275+
split(whereSeparator: { $0 == separator }, maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences)
222276
}
223277
}
224278

@@ -234,10 +288,12 @@ extension BidirectionalCollection where Element: Equatable {
234288

235289
extension Collection where Element: Equatable {
236290
@_disfavoredOverload
237-
func split<S: Sequence>(
238-
by separator: S
239-
) -> SplitCollection<ZSearcher<Self>> where S.Element == Element {
240-
split(by: ZSearcher(pattern: Array(separator), by: ==))
291+
func split<C: Collection>(
292+
by separator: C,
293+
maxSplits: Int,
294+
omittingEmptySubsequences: Bool
295+
) -> SplitCollection<ZSearcher<Self>> where C.Element == Element {
296+
split(by: ZSearcher(pattern: Array(separator), by: ==), maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences)
241297
}
242298

243299
// FIXME: Return `some Collection<SubSequence>` for SE-0346
@@ -247,10 +303,12 @@ extension Collection where Element: Equatable {
247303
/// - Returns: A collection of subsequences, split from this collection's
248304
/// elements.
249305
@available(SwiftStdlib 5.7, *)
250-
public func split<S: Sequence>(
251-
by separator: S
252-
) -> [SubSequence] where S.Element == Element {
253-
Array(split(by: ZSearcher(pattern: Array(separator), by: ==)))
306+
public func split<C: Collection>(
307+
separator: C,
308+
maxSplits: Int = .max,
309+
omittingEmptySubsequences: Bool = true
310+
) -> [SubSequence] where C.Element == Element {
311+
Array(split(by: ZSearcher(pattern: Array(separator), by: ==), maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences))
254312
}
255313
}
256314

@@ -267,12 +325,15 @@ extension BidirectionalCollection where Element: Equatable {
267325

268326
extension BidirectionalCollection where Element: Comparable {
269327
func split<C: Collection>(
270-
by separator: C
328+
by separator: C,
329+
maxSplits: Int,
330+
omittingEmptySubsequences: Bool
271331
) -> SplitCollection<PatternOrEmpty<TwoWaySearcher<Self>>>
272332
where C.Element == Element
273333
{
274334
split(
275-
by: PatternOrEmpty(searcher: TwoWaySearcher(pattern: Array(separator))))
335+
by: PatternOrEmpty(searcher: TwoWaySearcher(pattern: Array(separator))),
336+
maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences)
276337
}
277338

278339
// FIXME
@@ -292,9 +353,11 @@ extension BidirectionalCollection where Element: Comparable {
292353
extension BidirectionalCollection where SubSequence == Substring {
293354
@_disfavoredOverload
294355
func split<R: RegexComponent>(
295-
by separator: R
356+
by separator: R,
357+
maxSplits: Int,
358+
omittingEmptySubsequences: Bool
296359
) -> SplitCollection<RegexConsumer<R, Self>> {
297-
split(by: RegexConsumer(separator))
360+
split(by: RegexConsumer(separator), maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences)
298361
}
299362

300363
func splitFromBack<R: RegexComponent>(
@@ -303,15 +366,22 @@ extension BidirectionalCollection where SubSequence == Substring {
303366
splitFromBack(by: RegexConsumer(separator))
304367
}
305368

306-
// FIXME: Return `some Collection<Substring>` for SE-0346
369+
// TODO: Is this @_disfavoredOverload necessary?
370+
// It prevents split(separator: String) from choosing this overload instead
371+
// of the collection-based version when String has RegexComponent conformance
372+
373+
// FIXME: Return `some Collection<Subsequence>` for SE-0346
307374
/// Returns the longest possible subsequences of the collection, in order,
308375
/// around elements equal to the given separator.
309376
/// - Parameter separator: A regex describing elements to be split upon.
310377
/// - Returns: A collection of substrings, split from this collection's
311378
/// elements.
379+
@_disfavoredOverload
312380
public func split<R: RegexComponent>(
313-
by separator: R
381+
separator: R,
382+
maxSplits: Int = .max,
383+
omittingEmptySubsequences: Bool = true
314384
) -> [SubSequence] {
315-
Array(split(by: RegexConsumer(separator)))
385+
Array(split(by: RegexConsumer(separator), maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences))
316386
}
317387
}

0 commit comments

Comments
 (0)