Skip to content

[5.8][stdlib] Expose index rounding entry points #62860

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
183 changes: 182 additions & 1 deletion stdlib/public/core/StringIndexConversions.swift
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
// Copyright (c) 2014 - 2023 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
Expand Down Expand Up @@ -177,3 +177,184 @@ extension String.Index {
}
}

extension String {
/// Returns the largest valid index in `self` that does not exceed the given
/// position.
///
/// let cafe = "Cafe\u{301}" // "Café"
/// let accent = cafe.unicodeScalars.firstIndex(of: "\u{301")!
/// let char = cafe._index(roundingDown: accent)
/// print(cafe[char]) // "é"
///
/// `String` methods such as `index(after:)` and `distance(from:to:)`
/// implicitly round their input indices down to the nearest valid index:
///
/// let i = cafe.index(before: char)
/// let j = cafe.index(before: accent)
/// print(cafe[i], cafe[j]) // "f f"
/// print(i == j) // true
///
/// This operation lets you perform this rounding yourself. For example, this
/// can be used to safely check if `index(before:)` would consider some
/// arbitrary index equivalent to the start index before calling it.
///
/// - Parameter i: An index that is valid in at least one view of this string.
/// - Returns: The largest valid index within this string that doesn't exceed
/// `i`.
@available(SwiftStdlib 5.8, *)
public // SPI(Foundation) FIXME: This should be API
func _index(roundingDown i: Index) -> Index {
_guts.validateInclusiveCharacterIndex(i)
}
}

extension Substring {
/// Returns the largest valid index in `self` that does not exceed the given
/// position.
///
/// `Substring` methods such as `index(after:)` and `distance(from:to:)`
/// implicitly round their input indices down to the nearest valid index.
/// This operation lets you perform this rounding yourself. For example, this
/// can be used to safely check if `index(before:)` would consider some
/// arbitrary index equivalent to the start index before calling it.
///
/// - Parameter i: An index that is valid in at least one view of this
/// substring.
/// - Returns: The largest valid index within this substring that doesn't
/// exceed `i`.
@available(SwiftStdlib 5.8, *)
public // SPI(Foundation) FIXME: This should be API
func _index(roundingDown i: Index) -> Index {
_wholeGuts.validateInclusiveCharacterIndex(i, in: _bounds)
}
}

extension String.UnicodeScalarView {
/// Returns the largest valid index in `self` that does not exceed the given
/// position.
///
/// Methods such as `index(after:)` and `distance(from:to:)` implicitly round
/// their input indices down to the nearest valid index. This operation lets
/// you perform this rounding yourself. For example, this can be used to
/// safely check if `index(before:)` would consider some arbitrary index
/// equivalent to the start index before calling it.
///
/// - Parameter i: An index that is valid in at least one view of the string
/// shared by this view.
/// - Returns: The largest valid index within this view that doesn't exceed
/// `i`.
@_alwaysEmitIntoClient
public // SPI(Foundation) FIXME: This should be API
func _index(roundingDown i: Index) -> Index {
_guts.validateInclusiveScalarIndex(i)
}
}

extension Substring.UnicodeScalarView {
/// Returns the largest valid index in `self` that does not exceed the given
/// position.
///
/// Methods such as `index(after:)` and `distance(from:to:)` implicitly round
/// their input indices down to the nearest valid index. This operation lets
/// you perform this rounding yourself. For example, this can be used to
/// safely check if `index(before:)` would consider some arbitrary index
/// equivalent to the start index before calling it.
///
/// - Parameter i: An index that is valid in at least one view of the
/// substring shared by this view.
/// - Returns: The largest valid index within this view that doesn't exceed
/// `i`.
@_alwaysEmitIntoClient
public // SPI(Foundation) FIXME: This should be API
func _index(roundingDown i: Index) -> Index {
_wholeGuts.validateInclusiveScalarIndex(i, in: _bounds)
}
}

extension String.UTF8View {
/// Returns the largest valid index in `self` that does not exceed the given
/// position.
///
/// Methods such as `index(after:)` and `distance(from:to:)` implicitly round
/// their input indices down to the nearest valid index. This operation lets
/// you perform this rounding yourself. For example, this can be used to
/// safely check if `index(before:)` would consider some arbitrary index
/// equivalent to the start index before calling it.
///
/// - Parameter i: An index that is valid in at least one view of the
/// substring shared by this view.
/// - Returns: The largest valid index within this view that doesn't exceed
/// `i`.
@_alwaysEmitIntoClient
public // SPI(Foundation) FIXME: This should be API
func _index(roundingDown i: Index) -> Index {
let i = _guts.validateInclusiveSubscalarIndex(i)
guard _guts.isForeign else { return i.strippingTranscoding._knownUTF8 }
return _utf8AlignForeignIndex(i)
}
}

extension Substring.UTF8View {
/// Returns the largest valid index in `self` that does not exceed the given
/// position.
///
/// Methods such as `index(after:)` and `distance(from:to:)` implicitly round
/// their input indices down to the nearest valid index. This operation lets
/// you perform this rounding yourself. For example, this can be used to
/// safely check if `index(before:)` would consider some arbitrary index
/// equivalent to the start index before calling it.
///
/// - Parameter i: An index that is valid in at least one view of the
/// substring shared by this view.
/// - Returns: The largest valid index within this view that doesn't exceed
/// `i`.
@_alwaysEmitIntoClient
public // SPI(Foundation) FIXME: This should be API
func _index(roundingDown i: Index) -> Index {
let i = _wholeGuts.validateInclusiveSubscalarIndex(i, in: _bounds)
guard _wholeGuts.isForeign else { return i.strippingTranscoding._knownUTF8 }
return _slice._base._utf8AlignForeignIndex(i)
}
}

extension String.UTF16View {
/// Returns the valid index in `self` that this view considers equivalent to
/// the given index.
///
/// Indices in the UTF-8 view that address positions between Unicode scalars
/// are rounded down to the nearest scalar boundary; other indices are left as
/// is.
///
/// - Parameter i: An index that is valid in at least one view of the
/// substring shared by this view.
/// - Returns: The valid index in `self` that this view considers equivalent
/// to `i`.
@_alwaysEmitIntoClient
public // SPI(Foundation) FIXME: This should be API
func _index(roundingDown i: Index) -> Index {
let i = _guts.validateInclusiveSubscalarIndex(i)
if _guts.isForeign { return i.strippingTranscoding._knownUTF16 }
return _utf16AlignNativeIndex(i)
}
}

extension Substring.UTF16View {
/// Returns the valid index in `self` that this view considers equivalent to
/// the given index.
///
/// Indices in the UTF-8 view that address positions between Unicode scalars
/// are rounded down to the nearest scalar boundary; other indices are left as
/// is.
///
/// - Parameter i: An index that is valid in at least one view of the
/// substring shared by this view.
/// - Returns: The valid index in `self` that this view considers equivalent
/// to `i`.
@_alwaysEmitIntoClient
public // SPI(Foundation) FIXME: This should be API
func _index(roundingDown i: Index) -> Index {
let i = _wholeGuts.validateInclusiveSubscalarIndex(i, in: _bounds)
if _wholeGuts.isForeign { return i.strippingTranscoding._knownUTF16 }
return _slice._base._utf16AlignNativeIndex(i)
}
}
4 changes: 3 additions & 1 deletion stdlib/public/core/StringIndexValidation.swift
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2022 Apple Inc. and the Swift project authors
// Copyright (c) 2014 - 2023 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
Expand Down Expand Up @@ -54,6 +54,7 @@ extension _StringGuts {
return i
}

@_alwaysEmitIntoClient
internal func validateInclusiveSubscalarIndex(
_ i: String.Index,
in bounds: Range<String.Index>
Expand Down Expand Up @@ -175,6 +176,7 @@ extension _StringGuts {
/// - has an encoding that matches this string,
/// - is within the bounds of this string (including the `endIndex`), and
/// - is aligned on a scalar boundary.
@_alwaysEmitIntoClient
internal func validateInclusiveScalarIndex(
_ i: String.Index,
in bounds: Range<String.Index>
Expand Down
2 changes: 1 addition & 1 deletion stdlib/public/core/StringUTF8View.swift
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,7 @@ extension String.UTF8View {
// (referring to a continuation byte) and returns `idx`. Otherwise, this will
// scalar-align the index. This is needed because we may be passed a
// non-scalar-aligned foreign index from the UTF16View.
@inline(__always)
@_alwaysEmitIntoClient @inline(__always)
internal func _utf8AlignForeignIndex(_ idx: String.Index) -> String.Index {
_internalInvariant(_guts.isForeign)
guard idx.transcodedOffset == 0 else { return idx }
Expand Down
87 changes: 87 additions & 0 deletions test/stdlib/StringIndex.swift
Original file line number Diff line number Diff line change
Expand Up @@ -1124,3 +1124,90 @@ suite.test("Substring.removeSubrange entire range")
expectTrue(b.isEmpty)
#endif
}

if #available(SwiftStdlib 5.8, *) {
suite.test("String index rounding/Characters")
.forEach(in: examples) { string in
for index in string.allIndices(includingEnd: true) {
let end = string.endIndex
let expected = (index < end
? string.indices.lastIndex { $0 <= index }!
: end)
let actual = string._index(roundingDown: index)
expectEqual(actual, expected,
"""
index: \(index._description)
actual: \(actual._description)
expected: \(expected._description)
""")
}
}
}

suite.test("String index rounding/Scalars")
.forEach(in: examples) { string in
for index in string.allIndices(includingEnd: true) {
let end = string.unicodeScalars.endIndex
let expected = (index < end
? string.unicodeScalars.indices.lastIndex { $0 <= index }!
: end)
let actual = string.unicodeScalars._index(roundingDown: index)
expectEqual(actual, expected,
"""
index: \(index._description)
actual: \(actual._description)
expected: \(expected._description)
""")
}
}

suite.test("String index rounding/UTF-16")
.forEach(in: examples) { string in
//string.dumpIndices()
var utf16Indices = Set(string.utf16.indices)
utf16Indices.insert(string.utf16.endIndex)

for index in string.allIndices(includingEnd: true) {
let expected: String.Index
if utf16Indices.contains(index) {
expected = index
} else {
// If the index isn't valid in the UTF-16 view, it gets rounded down
// to the nearest scalar boundary. (Unintuitively, this is generally *not*
// the closest valid index within the UTF-16 view.)
expected = string.unicodeScalars.indices.lastIndex { $0 <= index }!
}
let actual = string.utf16._index(roundingDown: index)
expectEqual(actual, expected,
"""
index: \(index._description)
actual: \(actual._description)
expected: \(expected._description)
""")
}
}

suite.test("String index rounding/UTF-8")
.forEach(in: examples) { string in
//string.dumpIndices()
var utf8Indices = Set(string.utf8.indices)
utf8Indices.insert(string.utf8.endIndex)
for index in string.allIndices(includingEnd: true) {
let expected: String.Index
if utf8Indices.contains(index) {
expected = index
} else {
// If the index isn't valid in the UTF-8 view, it gets rounded down
// to the nearest scalar boundary. (Unintuitively, this is generally *not*
// the closest valid index within the UTF-8 view.)
expected = string.unicodeScalars.indices.lastIndex { $0 <= index }!
}
let actual = string.utf8._index(roundingDown: index)
expectEqual(actual, expected,
"""
index: \(index._description)
actual: \(actual._description)
expected: \(expected._description)
""")
}
}