Skip to content

Commit f1a12b9

Browse files
committed
Fix algorithms overload resolution issues
This change addresses two overload resolution problems with the collection-based algorithm methods. First, when RegexBuilder is imported, `String` gains `RegexComponent` conformance, which means the `RegexComponent`-based overloads win for strings, which is undesirable. Second, if a collection has an element type that can be expressed as an array literal, collection-based methods get selected ahead of any standard library counterpart. These two problems combine in a tricky way for `split` and `contains`. For `split`, both the collection-based and regex-based versions need to be marked as `@_disfavoredOverload` so that the problems above can be resolved. Unfortunately, this sets up an ambiguity once `String` has `RegexComponent` conformance, so the `RegexBuilder` module includes separate overloads for `String` and `Substring` that act as tie-breakers. If introduced in the standard library, these would be a source-breaking change, as they would win over the `Element`- based split when referencing the `split` method, as with `let splitFunction = myString.split`. For `contains`, the same requirements hold, with the added complication that the Foundation overlay defines its own `String.contains(_:)` method with different behavior than included in these additions. For this reason, the more specific overloads for `String` and `Substring` can't live in the `RegexBuilder` module, which creates a problem for source compatibility. As it stands now, this existing code does not compile with the new algorithm methods added, as the type of `vowelPredicate` changes from `(Character) -> Bool` to `(String) -> Bool`: ``` let str = "abcde" let vowelPredicate = "aeiou".contains print(str.filter(vowelPredicate)) ```
1 parent ce09452 commit f1a12b9

File tree

8 files changed

+112
-22
lines changed

8 files changed

+112
-22
lines changed

Sources/RegexBuilder/Algorithms.swift

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
//
1010
//===----------------------------------------------------------------------===//
1111

12-
import _StringProcessing
12+
@_spi(RegexBuilder) import _StringProcessing
1313

1414
// FIXME(rdar://92459215): We should be using 'some RegexComponent' instead of
1515
// <R: RegexComponent> for the methods below that don't impose any additional
@@ -313,3 +313,31 @@ where Self: BidirectionalCollection, SubSequence == Substring {
313313
try replace(content(), maxReplacements: maxReplacements, with: replacement)
314314
}
315315
}
316+
317+
// String split overload breakers
318+
319+
extension StringProtocol where SubSequence == Substring {
320+
@available(SwiftStdlib 5.7, *)
321+
public func split(
322+
separator: String,
323+
maxSplits: Int = .max,
324+
omittingEmptySubsequences: Bool = true
325+
) -> [Substring] {
326+
return _split(
327+
separator: separator,
328+
maxSplits: maxSplits,
329+
omittingEmptySubsequences: omittingEmptySubsequences)
330+
}
331+
332+
@available(SwiftStdlib 5.7, *)
333+
public func split(
334+
separator: Substring,
335+
maxSplits: Int = .max,
336+
omittingEmptySubsequences: Bool = true
337+
) -> [Substring] {
338+
return _split(
339+
separator: separator,
340+
maxSplits: maxSplits,
341+
omittingEmptySubsequences: omittingEmptySubsequences)
342+
}
343+
}

Sources/_StringProcessing/Algorithms/Algorithms/Contains.swift

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ extension Collection {
1515
func contains<Searcher: CollectionSearcher>(
1616
_ searcher: Searcher
1717
) -> Bool where Searcher.Searched == Self {
18-
firstRange(of: searcher) != nil
18+
_firstRange(of: searcher) != nil
1919
}
2020
}
2121

@@ -27,6 +27,7 @@ extension Collection where Element: Equatable {
2727
/// - Parameter other: A sequence to search for within this collection.
2828
/// - Returns: `true` if the collection contains the specified sequence,
2929
/// otherwise `false`.
30+
@_disfavoredOverload
3031
@available(SwiftStdlib 5.7, *)
3132
public func contains<C: Collection>(_ other: C) -> Bool
3233
where C.Element == Element
@@ -68,6 +69,7 @@ extension BidirectionalCollection where SubSequence == Substring {
6869
/// - Parameter regex: A regex to search for within this collection.
6970
/// - Returns: `true` if the regex was found in the collection, otherwise
7071
/// `false`.
72+
@_disfavoredOverload
7173
@available(SwiftStdlib 5.7, *)
7274
public func contains<R: RegexComponent>(_ regex: R) -> Bool {
7375
contains(RegexConsumer(regex))

Sources/_StringProcessing/Algorithms/Algorithms/FirstRange.swift

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
// MARK: `CollectionSearcher` algorithms
1313

1414
extension Collection {
15-
func firstRange<S: CollectionSearcher>(
15+
func _firstRange<S: CollectionSearcher>(
1616
of searcher: S
1717
) -> Range<Index>? where S.Searched == Self {
1818
var state = searcher.state(for: self, in: startIndex..<endIndex)
@@ -75,9 +75,10 @@ extension BidirectionalCollection where SubSequence == Substring {
7575
/// - Parameter regex: The regex to search for.
7676
/// - Returns: A range in the collection of the first occurrence of `regex`.
7777
/// Returns `nil` if `regex` is not found.
78+
@_disfavoredOverload
7879
@available(SwiftStdlib 5.7, *)
7980
public func firstRange<R: RegexComponent>(of regex: R) -> Range<Index>? {
80-
firstRange(of: RegexConsumer(regex))
81+
_firstRange(of: RegexConsumer(regex))
8182
}
8283

8384
@available(SwiftStdlib 5.7, *)

Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ extension ReversedRangesCollection: Sequence {
157157
// MARK: `CollectionSearcher` algorithms
158158

159159
extension Collection {
160-
func ranges<S: CollectionSearcher>(
160+
func _ranges<S: CollectionSearcher>(
161161
of searcher: S
162162
) -> RangesCollection<S> where S.Searched == Self {
163163
RangesCollection(base: self, searcher: searcher)
@@ -178,7 +178,7 @@ extension Collection where Element: Equatable {
178178
func ranges<C: Collection>(
179179
of other: C
180180
) -> RangesCollection<ZSearcher<Self>> where C.Element == Element {
181-
ranges(of: ZSearcher(pattern: Array(other), by: ==))
181+
_ranges(of: ZSearcher(pattern: Array(other), by: ==))
182182
}
183183

184184
// FIXME: Return `some Collection<Range<Index>>` for SE-0346
@@ -191,7 +191,7 @@ extension Collection where Element: Equatable {
191191
public func ranges<C: Collection>(
192192
of other: C
193193
) -> [Range<Index>] where C.Element == Element {
194-
ranges(of: ZSearcher(pattern: Array(other), by: ==)).map { $0 }
194+
_ranges(of: ZSearcher(pattern: Array(other), by: ==)).map { $0 }
195195
}
196196
}
197197

@@ -212,7 +212,7 @@ extension BidirectionalCollection where Element: Comparable {
212212
) -> RangesCollection<PatternOrEmpty<TwoWaySearcher<Self>>>
213213
where C.Element == Element
214214
{
215-
ranges(of: PatternOrEmpty(searcher: TwoWaySearcher(pattern: Array(other))))
215+
_ranges(of: PatternOrEmpty(searcher: TwoWaySearcher(pattern: Array(other))))
216216
}
217217

218218
// FIXME
@@ -231,10 +231,10 @@ extension BidirectionalCollection where Element: Comparable {
231231
extension BidirectionalCollection where SubSequence == Substring {
232232
@available(SwiftStdlib 5.7, *)
233233
@_disfavoredOverload
234-
func ranges<R: RegexComponent>(
234+
func _ranges<R: RegexComponent>(
235235
of regex: R
236236
) -> RangesCollection<RegexConsumer<R, Self>> {
237-
ranges(of: RegexConsumer(regex))
237+
_ranges(of: RegexConsumer(regex))
238238
}
239239

240240
@available(SwiftStdlib 5.7, *)
@@ -251,10 +251,11 @@ extension BidirectionalCollection where SubSequence == Substring {
251251
/// - Parameter regex: The regex to search for.
252252
/// - Returns: A collection or ranges in the receiver of all occurrences of
253253
/// `regex`. Returns an empty collection if `regex` is not found.
254+
@_disfavoredOverload
254255
@available(SwiftStdlib 5.7, *)
255256
public func ranges<R: RegexComponent>(
256257
of regex: R
257258
) -> [Range<Index>] {
258-
Array(ranges(of: RegexConsumer(regex)))
259+
Array(_ranges(of: RegexConsumer(regex)))
259260
}
260261
}

Sources/_StringProcessing/Algorithms/Algorithms/Replace.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ extension RangeReplaceableCollection {
2626
var result = Self()
2727
result.append(contentsOf: self[..<index])
2828

29-
for range in self[subrange].ranges(of: searcher).prefix(maxReplacements) {
29+
for range in self[subrange]._ranges(of: searcher).prefix(maxReplacements) {
3030
result.append(contentsOf: self[index..<range.lowerBound])
3131
result.append(contentsOf: replacement)
3232
index = range.upperBound

Sources/_StringProcessing/Algorithms/Algorithms/Split.swift

Lines changed: 49 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ struct SplitCollection<Searcher: CollectionSearcher> {
3434
maxSplits: Int,
3535
omittingEmptySubsequences: Bool)
3636
{
37-
self.ranges = base.ranges(of: searcher)
37+
self.ranges = base._ranges(of: searcher)
3838
self.maxSplits = maxSplits
3939
self.omittingEmptySubsequences = omittingEmptySubsequences
4040
}
@@ -307,13 +307,17 @@ extension Collection where Element: Equatable {
307307
/// - Parameter separator: The element to be split upon.
308308
/// - Returns: A collection of subsequences, split from this collection's
309309
/// elements.
310+
@_disfavoredOverload
310311
@available(SwiftStdlib 5.7, *)
311312
public func split<C: Collection>(
312313
separator: C,
313314
maxSplits: Int = .max,
314315
omittingEmptySubsequences: Bool = true
315316
) -> [SubSequence] where C.Element == Element {
316-
Array(split(by: ZSearcher(pattern: Array(separator), by: ==), maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences))
317+
Array(split(
318+
by: ZSearcher(pattern: Array(separator), by: ==),
319+
maxSplits: maxSplits,
320+
omittingEmptySubsequences: omittingEmptySubsequences))
317321
}
318322
}
319323

@@ -352,6 +356,45 @@ extension BidirectionalCollection where Element: Comparable {
352356
// }
353357
}
354358

359+
// String split overload breakers
360+
//
361+
// These are underscored and marked as SPI so that the *actual* public overloads
362+
// are only visible in RegexBuilder, to avoid breaking source with the
363+
// standard library's function of the same name that takes a `Character`
364+
// as the separator. *Those* overloads are necessary as tie-breakers between
365+
// the Collection-based and Regex-based `split`s, which in turn are both marked
366+
// @_disfavoredOverload to avoid the wrong overload being selected when a
367+
// collection's element type could be used interchangably with a collection of
368+
// that element (e.g. `Array<OptionSet>.split(separator: [])`).
369+
370+
extension StringProtocol where SubSequence == Substring {
371+
@_spi(RegexBuilder)
372+
@available(SwiftStdlib 5.7, *)
373+
public func _split(
374+
separator: String,
375+
maxSplits: Int = .max,
376+
omittingEmptySubsequences: Bool = true
377+
) -> [Substring] {
378+
Array(split(
379+
by: ZSearcher(pattern: Array(separator), by: ==),
380+
maxSplits: maxSplits,
381+
omittingEmptySubsequences: omittingEmptySubsequences))
382+
}
383+
384+
@_spi(RegexBuilder)
385+
@available(SwiftStdlib 5.7, *)
386+
public func _split(
387+
separator: Substring,
388+
maxSplits: Int = .max,
389+
omittingEmptySubsequences: Bool = true
390+
) -> [Substring] {
391+
Array(split(
392+
by: ZSearcher(pattern: Array(separator), by: ==),
393+
maxSplits: maxSplits,
394+
omittingEmptySubsequences: omittingEmptySubsequences))
395+
}
396+
}
397+
355398
// MARK: Regex algorithms
356399

357400
@available(SwiftStdlib 5.7, *)
@@ -388,6 +431,9 @@ extension BidirectionalCollection where SubSequence == Substring {
388431
maxSplits: Int = .max,
389432
omittingEmptySubsequences: Bool = true
390433
) -> [SubSequence] {
391-
Array(split(by: RegexConsumer(separator), maxSplits: maxSplits, omittingEmptySubsequences: omittingEmptySubsequences))
434+
Array(split(
435+
by: RegexConsumer(separator),
436+
maxSplits: maxSplits,
437+
omittingEmptySubsequences: omittingEmptySubsequences))
392438
}
393439
}

Sources/_StringProcessing/Algorithms/Algorithms/Trim.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,6 @@ extension Collection where SubSequence == Self, Element: Equatable {
216216
}
217217

218218
extension RangeReplaceableCollection where Element: Equatable {
219-
@_disfavoredOverload
220219
/// Removes the initial elements that satisfy the given predicate from the
221220
/// start of the sequence.
222221
/// - Parameter predicate: A closure that takes an element of the sequence
@@ -290,6 +289,7 @@ extension BidirectionalCollection where SubSequence == Substring {
290289
/// - Parameter prefix: The collection to remove from this collection.
291290
/// - Returns: A collection containing the elements that does not match
292291
/// `prefix` from the start.
292+
@_disfavoredOverload
293293
@available(SwiftStdlib 5.7, *)
294294
public func trimmingPrefix<R: RegexComponent>(_ regex: R) -> SubSequence {
295295
trimmingPrefix(RegexConsumer(regex))

Tests/RegexTests/AlgorithmsTests.swift

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
import _StringProcessing
1313
import XCTest
14+
//import RegexBuilder
1415

1516
// TODO: Protocol-powered testing
1617
class RegexConsumerTests: XCTestCase {
@@ -89,6 +90,17 @@ class AlgorithmTests: XCTestCase {
8990
// `Array<CountedOptionSet>`.
9091
XCTAssertFalse(cosArray.contains([]))
9192
XCTAssertEqual(CountedOptionSet.arrayLiteralCreationCount, 3)
93+
94+
// For these references to resolve to the `Element`-based stdlib function,
95+
// the `String`- and `Substring`-based `contains` functions need to be
96+
// marked as `@_disfavoredOverload`. However, that means that Foundation's
97+
// `String.contains` get selected instead, which has inconsistent behavior.
98+
99+
// Test that original `contains` functions are still accessible
100+
// let containsRef = "abcd".contains
101+
// XCTAssert(type(of: containsRef) == ((Character) -> Bool).self)
102+
// let containsParamsRef = "abcd".contains(_:)
103+
// XCTAssert(type(of: containsParamsRef) == ((Character) -> Bool).self)
92104
}
93105

94106
func testRegexRanges() {
@@ -201,12 +213,6 @@ class AlgorithmTests: XCTestCase {
201213

202214
XCTAssertEqual("".split(separator: ""), [])
203215
XCTAssertEqual("".split(separator: "", omittingEmptySubsequences: false), [""])
204-
205-
// Test that original `split` functions are still accessible
206-
let splitRef = "abcd".split
207-
XCTAssert(type(of: splitRef) == ((Character, Int, Bool) -> [Substring]).self)
208-
let splitParamsRef = "abcd".split(separator:maxSplits:omittingEmptySubsequences:)
209-
XCTAssert(type(of: splitParamsRef) == ((Character, Int, Bool) -> [Substring]).self)
210216
}
211217

212218
func testSplitSourceCompatibility() {
@@ -225,6 +231,12 @@ class AlgorithmTests: XCTestCase {
225231
// `Array<CountedOptionSet>`.
226232
XCTAssertEqual(cosArray.split(separator: []).count, 1)
227233
XCTAssertEqual(CountedOptionSet.arrayLiteralCreationCount, 3)
234+
235+
// Test that original `split` functions are still accessible
236+
let splitRef = "abcd".split
237+
XCTAssert(type(of: splitRef) == ((Character, Int, Bool) -> [Substring]).self)
238+
let splitParamsRef = "abcd".split(separator:maxSplits:omittingEmptySubsequences:)
239+
XCTAssert(type(of: splitParamsRef) == ((Character, Int, Bool) -> [Substring]).self)
228240
}
229241

230242
func testSplitPermutations() throws {

0 commit comments

Comments
 (0)