Skip to content

Commit 0d46e4d

Browse files
committed
Optimize splitting of whitespace runs.
According to Time Profiler, a great deal of time was spent in the `Collection.split` function, specifically allocating and deallocating the small arrays that held the newline-delimited whitespace runs. This has been replaced by a hand-written sequence that lazily computes the runs and avoids all those allocations. Even doing two passes over the whitespace (first to compute the number of runs, then to lazily compute the runs themselves) is significantly faster than hitting the heap.
1 parent 50b2897 commit 0d46e4d

File tree

3 files changed

+317
-224
lines changed

3 files changed

+317
-224
lines changed
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2014 - 2020 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
/// A sequence that lazily computes the longest possible subsequences of a collection, in order,
14+
/// around elements equal to a specific element.
15+
struct LazySplitSequence<Base: Collection>: Sequence where Base.Element: Equatable {
16+
/// The base collection.
17+
private let base: Base
18+
19+
/// The element around which to split.
20+
private let separator: Base.Element
21+
22+
/// The number of subsequences, which is precomputed when the sequence is initialized.
23+
let count: Int
24+
25+
var underestimatedCount: Int {
26+
return count
27+
}
28+
29+
/// Creates a new sequence that lazily computes the longest possible subsequences of a collection,
30+
/// in order, around elements equal to the given element.
31+
fileprivate init(base: Base, separator: Base.Element) {
32+
self.base = base
33+
self.separator = separator
34+
35+
// Precompute the number of subsequences.
36+
var count = 1
37+
for element in base where element == separator {
38+
count += 1
39+
}
40+
self.count = count
41+
}
42+
43+
func makeIterator() -> Iterator {
44+
return Iterator(base: base, separator: separator)
45+
}
46+
47+
struct Iterator: IteratorProtocol {
48+
private let base: Base
49+
private let separator: Base.Element
50+
51+
/// The start index of the current subsequence being computed.
52+
private var subSequenceStart: Base.Index
53+
54+
/// The end index of the current subsequence being computed.
55+
private var subSequenceEnd: Base.Index
56+
57+
/// The end index of the base collection.
58+
private let endIndex: Base.Index
59+
60+
/// Indicates whether the last subsequence has been computed.
61+
private var done: Bool
62+
63+
init(base: Base, separator: Base.Element) {
64+
self.base = base
65+
self.separator = separator
66+
67+
self.subSequenceStart = base.startIndex
68+
self.subSequenceEnd = self.subSequenceStart
69+
self.endIndex = base.endIndex
70+
self.done = false
71+
}
72+
73+
mutating func next() -> Base.SubSequence? {
74+
while subSequenceEnd != endIndex {
75+
if base[subSequenceEnd] == separator {
76+
let next = base[subSequenceStart..<subSequenceEnd]
77+
base.formIndex(after: &subSequenceEnd)
78+
subSequenceStart = subSequenceEnd
79+
return next
80+
}
81+
base.formIndex(after: &subSequenceEnd)
82+
}
83+
84+
if !done {
85+
done = true
86+
return base[subSequenceStart..<endIndex]
87+
}
88+
89+
return nil
90+
}
91+
}
92+
}
93+
94+
extension Collection where Element: Equatable {
95+
/// Returns a `Sequence` that lazily computes the longest possible subsequences of the collection,
96+
/// in order, around elements equal to the given element.
97+
///
98+
/// - Parameter separator: The element that should be split upon.
99+
/// - Returns: A sequence of subsequences, split from this collection’s elements.
100+
func lazilySplit(separator: Element) -> LazySplitSequence<Self> {
101+
return LazySplitSequence(base: self, separator: separator)
102+
}
103+
}
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2014 - 2020 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
/// An iterator that persistently remembers the most recent element returned by `next()`.
14+
struct RememberingIterator<Base: IteratorProtocol>: IteratorProtocol {
15+
/// The wrapped iterator.
16+
private var base: Base
17+
18+
/// The element most recently returned by the `next()` method.
19+
///
20+
/// This value will always remain equal to the last non-nil element returned by `next()`, even if
21+
/// multiple calls to `next()` are made that return nil after the iterator has been exhausted.
22+
/// Therefore, this property only evaluates to `nil` if the iterator had no elements in the first
23+
/// place.
24+
private(set) var latestElement: Base.Element?
25+
26+
/// Creates a new remembering iterator that wraps the specified iterator.
27+
init(_ base: Base) {
28+
self.base = base
29+
self.latestElement = nil
30+
}
31+
32+
mutating func next() -> Base.Element? {
33+
let element = base.next()
34+
if element != nil {
35+
latestElement = element
36+
}
37+
return element
38+
}
39+
}

0 commit comments

Comments
 (0)