Skip to content

[5.1][SE-0248] String Gaps #23706

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Apr 2, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 20 additions & 4 deletions stdlib/public/Darwin/Foundation/NSRange.swift
Original file line number Diff line number Diff line change
Expand Up @@ -175,17 +175,33 @@ extension Range where Bound == Int {
}

extension Range where Bound == String.Index {
public init?(_ range: NSRange, in string: __shared String) {
private init?<S: StringProtocol>(
_ range: NSRange, _genericIn string: __shared S
) {
// Corresponding stdlib version
guard #available(macOS 9999, iOS 9999, tvOS 9999, watchOS 9999, *) else {
fatalError()
}
let u = string.utf16
guard range.location != NSNotFound,
let start = u.index(u.startIndex, offsetBy: range.location, limitedBy: u.endIndex),
let end = u.index(u.startIndex, offsetBy: range.location + range.length, limitedBy: u.endIndex),
let start = u.index(
u.startIndex, offsetBy: range.location, limitedBy: u.endIndex),
let end = u.index(
start, offsetBy: range.length, limitedBy: u.endIndex),
let lowerBound = String.Index(start, within: string),
let upperBound = String.Index(end, within: string)
else { return nil }

self = lowerBound..<upperBound
}

public init?(_ range: NSRange, in string: __shared String) {
self.init(range, _genericIn: string)
}
@available(macOS 9999, iOS 9999, tvOS 9999, watchOS 9999, *)
public init?<S: StringProtocol>(_ range: NSRange, in string: __shared S) {
self.init(range, _genericIn: string)
}
}

extension NSRange : CustomReflectable {
Expand Down
4 changes: 4 additions & 0 deletions stdlib/public/core/ASCII.swift
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ extension Unicode.ASCII : Unicode.Encoding {
return EncodedScalar(0x1a) // U+001A SUBSTITUTE; best we can do for ASCII
}

/// Returns whether the given code unit represents an ASCII scalar
@_alwaysEmitIntoClient
public static func isASCII(_ x: CodeUnit) -> Bool { return UTF8.isASCII(x) }

@inline(__always)
@inlinable
public static func _isScalar(_ x: CodeUnit) -> Bool {
Expand Down
1 change: 1 addition & 0 deletions stdlib/public/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ set(SWIFTLIB_ESSENTIAL
KeyValuePairs.swift
LazyCollection.swift
LazySequence.swift
LegacyABI.swift
LifetimeManager.swift
ManagedBuffer.swift
Map.swift
Expand Down
27 changes: 14 additions & 13 deletions stdlib/public/core/Character.swift
Original file line number Diff line number Diff line change
Expand Up @@ -88,25 +88,26 @@ extension Character {
}

extension Character {
@usableFromInline
typealias UTF8View = String.UTF8View
/// A view of a character's contents as a collection of UTF-8 code units. See
/// String.UTF8View for more information
public typealias UTF8View = String.UTF8View

/// A UTF-8 encoding of `self`.
@inlinable
internal var utf8: UTF8View {
return _str.utf8
}
@usableFromInline
typealias UTF16View = String.UTF16View
public var utf8: UTF8View { return _str.utf8 }

/// A view of a character's contents as a collection of UTF-16 code units. See
/// String.UTF16View for more information
public typealias UTF16View = String.UTF16View

/// A UTF-16 encoding of `self`.
@inlinable
internal var utf16: UTF16View {
return _str.utf16
}
public var utf16: UTF16View { return _str.utf16 }

public typealias UnicodeScalarView = String.UnicodeScalarView

@inlinable
public var unicodeScalars: UnicodeScalarView {
return _str.unicodeScalars
}
public var unicodeScalars: UnicodeScalarView { return _str.unicodeScalars }
}

extension Character :
Expand Down
1 change: 1 addition & 0 deletions stdlib/public/core/GroupInfo.json
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@
"Equatable.swift",
"Comparable.swift",
"Codable.swift",
"LegacyABI.swift",
"MigrationSupport.swift"
],
"Result": [
Expand Down
41 changes: 41 additions & 0 deletions stdlib/public/core/LegacyABI.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2019 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//

// This file contains non-API (or underscored) declarations that are needed to
// be kept around for ABI compatibility

extension Unicode.UTF16 {
@available(*, unavailable, renamed: "Unicode.UTF16.isASCII")
@inlinable
public static func _isASCII(_ x: CodeUnit) -> Bool {
return Unicode.UTF16.isASCII(x)
}
}

@available(*, unavailable, renamed: "Unicode.UTF8.isASCII")
@inlinable
internal func _isASCII(_ x: UInt8) -> Bool {
return Unicode.UTF8.isASCII(x)
}

@available(*, unavailable, renamed: "Unicode.UTF8.isContinuation")
@inlinable
internal func _isContinuation(_ x: UInt8) -> Bool {
return UTF8.isContinuation(x)
}

extension Substring {
@available(*, unavailable, renamed: "Substring.base")
@inlinable
internal var _wholeString: String { return base }
}

2 changes: 1 addition & 1 deletion stdlib/public/core/StringComparison.swift
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ private func _findBoundary(
}

// Back up to scalar boundary
while _isContinuation(utf8[_unchecked: idx]) {
while UTF8.isContinuation(utf8[_unchecked: idx]) {
idx &-= 1
}

Expand Down
4 changes: 2 additions & 2 deletions stdlib/public/core/StringCreate.swift
Original file line number Diff line number Diff line change
Expand Up @@ -193,8 +193,8 @@ extension String {
internal static func _fromSubstring(
_ substring: __shared Substring
) -> String {
if substring._offsetRange == substring._wholeString._offsetRange {
return substring._wholeString
if substring._offsetRange == substring.base._offsetRange {
return substring.base
}

return String._copying(substring)
Expand Down
67 changes: 59 additions & 8 deletions stdlib/public/core/StringIndexConversions.swift
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,60 @@
//===----------------------------------------------------------------------===//

extension String.Index {
private init?<S: StringProtocol>(
_ idx: String.Index, _genericWithin target: S
) {
guard target._wholeGuts.isOnGraphemeClusterBoundary(idx),
idx >= target.startIndex && idx <= target.endIndex
else {
return nil
}

self = idx
}

/// Creates an index in the given string that corresponds exactly to the
/// specified position.
///
/// If the index passed as `sourcePosition` represents the start of an
/// extended grapheme cluster---the element type of a string---then the
/// initializer succeeds.
///
/// The following example converts the position of the Unicode scalar `"e"`
/// into its corresponding position in the string. The character at that
/// position is the composed `"é"` character.
///
/// let cafe = "Cafe\u{0301}"
/// print(cafe)
/// // Prints "Café"
///
/// let scalarsIndex = cafe.unicodeScalars.firstIndex(of: "e")!
/// let stringIndex = String.Index(scalarsIndex, within: cafe)!
///
/// print(cafe[...stringIndex])
/// // Prints "Café"
///
/// If the index passed as `sourcePosition` doesn't have an exact
/// corresponding position in `target`, the result of the initializer is
/// `nil`. For example, an attempt to convert the position of the combining
/// acute accent (`"\u{0301}"`) fails. Combining Unicode scalars do not have
/// their own position in a string.
///
/// let nextScalarsIndex = cafe.unicodeScalars.index(after: scalarsIndex)
/// let nextStringIndex = String.Index(nextScalarsIndex, within: cafe)
///
/// print(nextStringIndex)
/// // Prints "nil"
///
/// - Parameters:
/// - sourcePosition: A position in a view of the `target` parameter.
/// `sourcePosition` must be a valid index of at least one of the views
/// of `target`.
/// - target: The string referenced by the resulting index.
public init?(_ sourcePosition: String.Index, within target: String) {
self.init(sourcePosition, _genericWithin: target)
}

/// Creates an index in the given string that corresponds exactly to the
/// specified position.
///
Expand Down Expand Up @@ -49,14 +103,11 @@ extension String.Index {
/// `sourcePosition` must be a valid index of at least one of the views
/// of `target`.
/// - target: The string referenced by the resulting index.
public init?(
_ sourcePosition: String.Index,
within target: String
@available(macOS 9999, iOS 9999, tvOS 9999, watchOS 9999, *)
public init?<S: StringProtocol>(
_ sourcePosition: String.Index, within target: S
) {
guard target._guts.isOnGraphemeClusterBoundary(sourcePosition) else {
return nil
}
self = sourcePosition
self.init(sourcePosition, _genericWithin: target)
}

/// Returns the position in the given UTF-8 view that corresponds exactly to
Expand All @@ -81,7 +132,7 @@ extension String.Index {
/// position of a UTF-16 trailing surrogate returns `nil`.
public func samePosition(
in utf8: String.UTF8View
) -> String.UTF8View.Index? {
) -> String.UTF8View.Index? {
return String.UTF8View.Index(self, within: utf8)
}

Expand Down
4 changes: 2 additions & 2 deletions stdlib/public/core/StringNormalization.swift
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ extension UnsafeBufferPointer where Element == UInt8 {
if index == 0 || index == count {
return true
}
assert(!_isContinuation(self[_unchecked: index]))
assert(!UTF8.isContinuation(self[_unchecked: index]))

// Sub-300 latiny fast-path
if self[_unchecked: index] < 0xCC { return true }
Expand Down Expand Up @@ -165,7 +165,7 @@ extension UnsafeBufferPointer where Element == UInt8 {
_internalInvariant(index == count)
return true
}
return !_isContinuation(self[index])
return !UTF8.isContinuation(self[index])
}

}
Expand Down
14 changes: 5 additions & 9 deletions stdlib/public/core/StringUTF8Validation.swift
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,15 @@ private func _isNotOverlong_F0(_ x: UInt8) -> Bool {
}

private func _isNotOverlong_F4(_ x: UInt8) -> Bool {
return _isContinuation(x) && x <= 0x8F
return UTF8.isContinuation(x) && x <= 0x8F
}

private func _isNotOverlong_E0(_ x: UInt8) -> Bool {
return (0xA0...0xBF).contains(x)
}

private func _isNotOverlong_ED(_ x: UInt8) -> Bool {
return _isContinuation(x) && x <= 0x9F
}

private func _isASCII_cmp(_ x: UInt8) -> Bool {
return x <= 0x7F
return UTF8.isContinuation(x) && x <= 0x9F
}

internal struct UTF8ExtraInfo: Equatable {
Expand Down Expand Up @@ -48,7 +44,7 @@ internal func validateUTF8(_ buf: UnsafeBufferPointer<UInt8>) -> UTF8ValidationR
guard f(cu) else { throw UTF8ValidationError() }
}
@inline(__always) func guaranteeContinuation() throws {
try guaranteeIn(_isContinuation)
try guaranteeIn(UTF8.isContinuation)
}

func _legacyInvalidLengthCalculation(_ _buffer: (_storage: UInt32, ())) -> Int {
Expand Down Expand Up @@ -94,7 +90,7 @@ internal func validateUTF8(_ buf: UnsafeBufferPointer<UInt8>) -> UTF8ValidationR
var endIndex = buf.startIndex
var iter = buf.makeIterator()
_ = iter.next()
while let cu = iter.next(), !_isASCII(cu) && !_isUTF8MultiByteLeading(cu) {
while let cu = iter.next(), UTF8.isContinuation(cu) {
endIndex += 1
}
let illegalRange = Range(buf.startIndex...endIndex)
Expand All @@ -107,7 +103,7 @@ internal func validateUTF8(_ buf: UnsafeBufferPointer<UInt8>) -> UTF8ValidationR
do {
var isASCII = true
while let cu = iter.next() {
if _isASCII(cu) { lastValidIndex &+= 1; continue }
if UTF8.isASCII(cu) { lastValidIndex &+= 1; continue }
isASCII = false
if _slowPath(!_isUTF8MultiByteLeading(cu)) {
throw UTF8ValidationError()
Expand Down
4 changes: 2 additions & 2 deletions stdlib/public/core/StringUTF8View.swift
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,7 @@ extension String.UTF8View {

let (scalar, scalarLen) = _guts.foreignErrorCorrectedScalar(
startingAt: i.strippingTranscoding)
let utf8Len = _numUTF8CodeUnits(scalar)
let utf8Len = UTF8.width(scalar)

if utf8Len == 1 {
_internalInvariant(i.transcodedOffset == 0)
Expand All @@ -442,7 +442,7 @@ extension String.UTF8View {

let (scalar, scalarLen) = _guts.foreignErrorCorrectedScalar(
endingAt: i)
let utf8Len = _numUTF8CodeUnits(scalar)
let utf8Len = UTF8.width(scalar)
return i.encoded(offsetBy: -scalarLen).transcoded(withOffset: utf8Len &- 1)
}

Expand Down
4 changes: 2 additions & 2 deletions stdlib/public/core/StringUnicodeScalarView.swift
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,7 @@ extension String.UnicodeScalarView {
internal func _foreignIndex(after i: Index) -> Index {
_internalInvariant(_guts.isForeign)
let cu = _guts.foreignErrorCorrectedUTF16CodeUnit(at: i)
let len = _isLeadingSurrogate(cu) ? 2 : 1
let len = UTF16.isLeadSurrogate(cu) ? 2 : 1

return i.encoded(offsetBy: len)
}
Expand All @@ -429,7 +429,7 @@ extension String.UnicodeScalarView {
_internalInvariant(_guts.isForeign)
let priorIdx = i.priorEncoded
let cu = _guts.foreignErrorCorrectedUTF16CodeUnit(at: priorIdx)
let len = _isTrailingSurrogate(cu) ? 2 : 1
let len = UTF16.isTrailSurrogate(cu) ? 2 : 1

return i.encoded(offsetBy: -len)
}
Expand Down
Loading