Skip to content

Commit 3c1d559

Browse files
authored
Merge pull request #23706 from milseman/5_1_gab_barg
[5.1][SE-0248] String Gaps
2 parents fbc1377 + 51d158b commit 3c1d559

25 files changed

+679
-392
lines changed

stdlib/public/Darwin/Foundation/NSRange.swift

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -175,17 +175,33 @@ extension Range where Bound == Int {
175175
}
176176

177177
extension Range where Bound == String.Index {
178-
public init?(_ range: NSRange, in string: __shared String) {
178+
private init?<S: StringProtocol>(
179+
_ range: NSRange, _genericIn string: __shared S
180+
) {
181+
// Corresponding stdlib version
182+
guard #available(macOS 9999, iOS 9999, tvOS 9999, watchOS 9999, *) else {
183+
fatalError()
184+
}
179185
let u = string.utf16
180186
guard range.location != NSNotFound,
181-
let start = u.index(u.startIndex, offsetBy: range.location, limitedBy: u.endIndex),
182-
let end = u.index(u.startIndex, offsetBy: range.location + range.length, limitedBy: u.endIndex),
187+
let start = u.index(
188+
u.startIndex, offsetBy: range.location, limitedBy: u.endIndex),
189+
let end = u.index(
190+
start, offsetBy: range.length, limitedBy: u.endIndex),
183191
let lowerBound = String.Index(start, within: string),
184192
let upperBound = String.Index(end, within: string)
185193
else { return nil }
186-
194+
187195
self = lowerBound..<upperBound
188196
}
197+
198+
public init?(_ range: NSRange, in string: __shared String) {
199+
self.init(range, _genericIn: string)
200+
}
201+
@available(macOS 9999, iOS 9999, tvOS 9999, watchOS 9999, *)
202+
public init?<S: StringProtocol>(_ range: NSRange, in string: __shared S) {
203+
self.init(range, _genericIn: string)
204+
}
189205
}
190206

191207
extension NSRange : CustomReflectable {

stdlib/public/core/ASCII.swift

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@ extension Unicode.ASCII : Unicode.Encoding {
2323
return EncodedScalar(0x1a) // U+001A SUBSTITUTE; best we can do for ASCII
2424
}
2525

26+
/// Returns whether the given code unit represents an ASCII scalar
27+
@_alwaysEmitIntoClient
28+
public static func isASCII(_ x: CodeUnit) -> Bool { return UTF8.isASCII(x) }
29+
2630
@inline(__always)
2731
@inlinable
2832
public static func _isScalar(_ x: CodeUnit) -> Bool {

stdlib/public/core/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ set(SWIFTLIB_ESSENTIAL
9090
KeyValuePairs.swift
9191
LazyCollection.swift
9292
LazySequence.swift
93+
LegacyABI.swift
9394
LifetimeManager.swift
9495
ManagedBuffer.swift
9596
Map.swift

stdlib/public/core/Character.swift

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -88,25 +88,26 @@ extension Character {
8888
}
8989

9090
extension Character {
91-
@usableFromInline
92-
typealias UTF8View = String.UTF8View
91+
/// A view of a character's contents as a collection of UTF-8 code units. See
92+
/// String.UTF8View for more information
93+
public typealias UTF8View = String.UTF8View
9394

95+
/// A UTF-8 encoding of `self`.
9496
@inlinable
95-
internal var utf8: UTF8View {
96-
return _str.utf8
97-
}
98-
@usableFromInline
99-
typealias UTF16View = String.UTF16View
97+
public var utf8: UTF8View { return _str.utf8 }
98+
99+
/// A view of a character's contents as a collection of UTF-16 code units. See
100+
/// String.UTF16View for more information
101+
public typealias UTF16View = String.UTF16View
100102

103+
/// A UTF-16 encoding of `self`.
101104
@inlinable
102-
internal var utf16: UTF16View {
103-
return _str.utf16
104-
}
105+
public var utf16: UTF16View { return _str.utf16 }
106+
105107
public typealias UnicodeScalarView = String.UnicodeScalarView
108+
106109
@inlinable
107-
public var unicodeScalars: UnicodeScalarView {
108-
return _str.unicodeScalars
109-
}
110+
public var unicodeScalars: UnicodeScalarView { return _str.unicodeScalars }
110111
}
111112

112113
extension Character :

stdlib/public/core/GroupInfo.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,7 @@
223223
"Equatable.swift",
224224
"Comparable.swift",
225225
"Codable.swift",
226+
"LegacyABI.swift",
226227
"MigrationSupport.swift"
227228
],
228229
"Result": [

stdlib/public/core/LegacyABI.swift

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2014 - 2019 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
// This file contains non-API (or underscored) declarations that are needed to
14+
// be kept around for ABI compatibility
15+
16+
extension Unicode.UTF16 {
17+
@available(*, unavailable, renamed: "Unicode.UTF16.isASCII")
18+
@inlinable
19+
public static func _isASCII(_ x: CodeUnit) -> Bool {
20+
return Unicode.UTF16.isASCII(x)
21+
}
22+
}
23+
24+
@available(*, unavailable, renamed: "Unicode.UTF8.isASCII")
25+
@inlinable
26+
internal func _isASCII(_ x: UInt8) -> Bool {
27+
return Unicode.UTF8.isASCII(x)
28+
}
29+
30+
@available(*, unavailable, renamed: "Unicode.UTF8.isContinuation")
31+
@inlinable
32+
internal func _isContinuation(_ x: UInt8) -> Bool {
33+
return UTF8.isContinuation(x)
34+
}
35+
36+
extension Substring {
37+
@available(*, unavailable, renamed: "Substring.base")
38+
@inlinable
39+
internal var _wholeString: String { return base }
40+
}
41+

stdlib/public/core/StringComparison.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ private func _findBoundary(
239239
}
240240

241241
// Back up to scalar boundary
242-
while _isContinuation(utf8[_unchecked: idx]) {
242+
while UTF8.isContinuation(utf8[_unchecked: idx]) {
243243
idx &-= 1
244244
}
245245

stdlib/public/core/StringCreate.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -193,8 +193,8 @@ extension String {
193193
internal static func _fromSubstring(
194194
_ substring: __shared Substring
195195
) -> String {
196-
if substring._offsetRange == substring._wholeString._offsetRange {
197-
return substring._wholeString
196+
if substring._offsetRange == substring.base._offsetRange {
197+
return substring.base
198198
}
199199

200200
return String._copying(substring)

stdlib/public/core/StringIndexConversions.swift

Lines changed: 59 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,60 @@
1111
//===----------------------------------------------------------------------===//
1212

1313
extension String.Index {
14+
private init?<S: StringProtocol>(
15+
_ idx: String.Index, _genericWithin target: S
16+
) {
17+
guard target._wholeGuts.isOnGraphemeClusterBoundary(idx),
18+
idx >= target.startIndex && idx <= target.endIndex
19+
else {
20+
return nil
21+
}
22+
23+
self = idx
24+
}
25+
26+
/// Creates an index in the given string that corresponds exactly to the
27+
/// specified position.
28+
///
29+
/// If the index passed as `sourcePosition` represents the start of an
30+
/// extended grapheme cluster---the element type of a string---then the
31+
/// initializer succeeds.
32+
///
33+
/// The following example converts the position of the Unicode scalar `"e"`
34+
/// into its corresponding position in the string. The character at that
35+
/// position is the composed `"é"` character.
36+
///
37+
/// let cafe = "Cafe\u{0301}"
38+
/// print(cafe)
39+
/// // Prints "Café"
40+
///
41+
/// let scalarsIndex = cafe.unicodeScalars.firstIndex(of: "e")!
42+
/// let stringIndex = String.Index(scalarsIndex, within: cafe)!
43+
///
44+
/// print(cafe[...stringIndex])
45+
/// // Prints "Café"
46+
///
47+
/// If the index passed as `sourcePosition` doesn't have an exact
48+
/// corresponding position in `target`, the result of the initializer is
49+
/// `nil`. For example, an attempt to convert the position of the combining
50+
/// acute accent (`"\u{0301}"`) fails. Combining Unicode scalars do not have
51+
/// their own position in a string.
52+
///
53+
/// let nextScalarsIndex = cafe.unicodeScalars.index(after: scalarsIndex)
54+
/// let nextStringIndex = String.Index(nextScalarsIndex, within: cafe)
55+
///
56+
/// print(nextStringIndex)
57+
/// // Prints "nil"
58+
///
59+
/// - Parameters:
60+
/// - sourcePosition: A position in a view of the `target` parameter.
61+
/// `sourcePosition` must be a valid index of at least one of the views
62+
/// of `target`.
63+
/// - target: The string referenced by the resulting index.
64+
public init?(_ sourcePosition: String.Index, within target: String) {
65+
self.init(sourcePosition, _genericWithin: target)
66+
}
67+
1468
/// Creates an index in the given string that corresponds exactly to the
1569
/// specified position.
1670
///
@@ -49,14 +103,11 @@ extension String.Index {
49103
/// `sourcePosition` must be a valid index of at least one of the views
50104
/// of `target`.
51105
/// - target: The string referenced by the resulting index.
52-
public init?(
53-
_ sourcePosition: String.Index,
54-
within target: String
106+
@available(macOS 9999, iOS 9999, tvOS 9999, watchOS 9999, *)
107+
public init?<S: StringProtocol>(
108+
_ sourcePosition: String.Index, within target: S
55109
) {
56-
guard target._guts.isOnGraphemeClusterBoundary(sourcePosition) else {
57-
return nil
58-
}
59-
self = sourcePosition
110+
self.init(sourcePosition, _genericWithin: target)
60111
}
61112

62113
/// Returns the position in the given UTF-8 view that corresponds exactly to
@@ -81,7 +132,7 @@ extension String.Index {
81132
/// position of a UTF-16 trailing surrogate returns `nil`.
82133
public func samePosition(
83134
in utf8: String.UTF8View
84-
) -> String.UTF8View.Index? {
135+
) -> String.UTF8View.Index? {
85136
return String.UTF8View.Index(self, within: utf8)
86137
}
87138

stdlib/public/core/StringNormalization.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ extension UnsafeBufferPointer where Element == UInt8 {
108108
if index == 0 || index == count {
109109
return true
110110
}
111-
assert(!_isContinuation(self[_unchecked: index]))
111+
assert(!UTF8.isContinuation(self[_unchecked: index]))
112112

113113
// Sub-300 latiny fast-path
114114
if self[_unchecked: index] < 0xCC { return true }
@@ -165,7 +165,7 @@ extension UnsafeBufferPointer where Element == UInt8 {
165165
_internalInvariant(index == count)
166166
return true
167167
}
168-
return !_isContinuation(self[index])
168+
return !UTF8.isContinuation(self[index])
169169
}
170170

171171
}

stdlib/public/core/StringUTF8Validation.swift

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,19 +7,15 @@ private func _isNotOverlong_F0(_ x: UInt8) -> Bool {
77
}
88

99
private func _isNotOverlong_F4(_ x: UInt8) -> Bool {
10-
return _isContinuation(x) && x <= 0x8F
10+
return UTF8.isContinuation(x) && x <= 0x8F
1111
}
1212

1313
private func _isNotOverlong_E0(_ x: UInt8) -> Bool {
1414
return (0xA0...0xBF).contains(x)
1515
}
1616

1717
private func _isNotOverlong_ED(_ x: UInt8) -> Bool {
18-
return _isContinuation(x) && x <= 0x9F
19-
}
20-
21-
private func _isASCII_cmp(_ x: UInt8) -> Bool {
22-
return x <= 0x7F
18+
return UTF8.isContinuation(x) && x <= 0x9F
2319
}
2420

2521
internal struct UTF8ExtraInfo: Equatable {
@@ -48,7 +44,7 @@ internal func validateUTF8(_ buf: UnsafeBufferPointer<UInt8>) -> UTF8ValidationR
4844
guard f(cu) else { throw UTF8ValidationError() }
4945
}
5046
@inline(__always) func guaranteeContinuation() throws {
51-
try guaranteeIn(_isContinuation)
47+
try guaranteeIn(UTF8.isContinuation)
5248
}
5349

5450
func _legacyInvalidLengthCalculation(_ _buffer: (_storage: UInt32, ())) -> Int {
@@ -94,7 +90,7 @@ internal func validateUTF8(_ buf: UnsafeBufferPointer<UInt8>) -> UTF8ValidationR
9490
var endIndex = buf.startIndex
9591
var iter = buf.makeIterator()
9692
_ = iter.next()
97-
while let cu = iter.next(), !_isASCII(cu) && !_isUTF8MultiByteLeading(cu) {
93+
while let cu = iter.next(), UTF8.isContinuation(cu) {
9894
endIndex += 1
9995
}
10096
let illegalRange = Range(buf.startIndex...endIndex)
@@ -107,7 +103,7 @@ internal func validateUTF8(_ buf: UnsafeBufferPointer<UInt8>) -> UTF8ValidationR
107103
do {
108104
var isASCII = true
109105
while let cu = iter.next() {
110-
if _isASCII(cu) { lastValidIndex &+= 1; continue }
106+
if UTF8.isASCII(cu) { lastValidIndex &+= 1; continue }
111107
isASCII = false
112108
if _slowPath(!_isUTF8MultiByteLeading(cu)) {
113109
throw UTF8ValidationError()

stdlib/public/core/StringUTF8View.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -415,7 +415,7 @@ extension String.UTF8View {
415415

416416
let (scalar, scalarLen) = _guts.foreignErrorCorrectedScalar(
417417
startingAt: i.strippingTranscoding)
418-
let utf8Len = _numUTF8CodeUnits(scalar)
418+
let utf8Len = UTF8.width(scalar)
419419

420420
if utf8Len == 1 {
421421
_internalInvariant(i.transcodedOffset == 0)
@@ -442,7 +442,7 @@ extension String.UTF8View {
442442

443443
let (scalar, scalarLen) = _guts.foreignErrorCorrectedScalar(
444444
endingAt: i)
445-
let utf8Len = _numUTF8CodeUnits(scalar)
445+
let utf8Len = UTF8.width(scalar)
446446
return i.encoded(offsetBy: -scalarLen).transcoded(withOffset: utf8Len &- 1)
447447
}
448448

stdlib/public/core/StringUnicodeScalarView.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -418,7 +418,7 @@ extension String.UnicodeScalarView {
418418
internal func _foreignIndex(after i: Index) -> Index {
419419
_internalInvariant(_guts.isForeign)
420420
let cu = _guts.foreignErrorCorrectedUTF16CodeUnit(at: i)
421-
let len = _isLeadingSurrogate(cu) ? 2 : 1
421+
let len = UTF16.isLeadSurrogate(cu) ? 2 : 1
422422

423423
return i.encoded(offsetBy: len)
424424
}
@@ -429,7 +429,7 @@ extension String.UnicodeScalarView {
429429
_internalInvariant(_guts.isForeign)
430430
let priorIdx = i.priorEncoded
431431
let cu = _guts.foreignErrorCorrectedUTF16CodeUnit(at: priorIdx)
432-
let len = _isTrailingSurrogate(cu) ? 2 : 1
432+
let len = UTF16.isTrailSurrogate(cu) ? 2 : 1
433433

434434
return i.encoded(offsetBy: -len)
435435
}

0 commit comments

Comments
 (0)