Skip to content

Commit d358ece

Browse files
authored
Merge pull request #62798 from lorentey/string-index-rounding
[stdlib] Expose index rounding entry points
2 parents 4ffc5fe + f8b997b commit d358ece

File tree

4 files changed

+273
-3
lines changed

4 files changed

+273
-3
lines changed

stdlib/public/core/StringIndexConversions.swift

Lines changed: 182 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
//
33
// This source file is part of the Swift.org open source project
44
//
5-
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
5+
// Copyright (c) 2014 - 2023 Apple Inc. and the Swift project authors
66
// Licensed under Apache License v2.0 with Runtime Library Exception
77
//
88
// See https://swift.org/LICENSE.txt for license information
@@ -177,3 +177,184 @@ extension String.Index {
177177
}
178178
}
179179

180+
extension String {
181+
/// Returns the largest valid index in `self` that does not exceed the given
182+
/// position.
183+
///
184+
/// let cafe = "Cafe\u{301}" // "Café"
185+
/// let accent = cafe.unicodeScalars.firstIndex(of: "\u{301")!
186+
/// let char = cafe._index(roundingDown: accent)
187+
/// print(cafe[char]) // "é"
188+
///
189+
/// `String` methods such as `index(after:)` and `distance(from:to:)`
190+
/// implicitly round their input indices down to the nearest valid index:
191+
///
192+
/// let i = cafe.index(before: char)
193+
/// let j = cafe.index(before: accent)
194+
/// print(cafe[i], cafe[j]) // "f f"
195+
/// print(i == j) // true
196+
///
197+
/// This operation lets you perform this rounding yourself. For example, this
198+
/// can be used to safely check if `index(before:)` would consider some
199+
/// arbitrary index equivalent to the start index before calling it.
200+
///
201+
/// - Parameter i: An index that is valid in at least one view of this string.
202+
/// - Returns: The largest valid index within this string that doesn't exceed
203+
/// `i`.
204+
@available(SwiftStdlib 5.8, *)
205+
public // SPI(Foundation) FIXME: This should be API
206+
func _index(roundingDown i: Index) -> Index {
207+
_guts.validateInclusiveCharacterIndex(i)
208+
}
209+
}
210+
211+
extension Substring {
212+
/// Returns the largest valid index in `self` that does not exceed the given
213+
/// position.
214+
///
215+
/// `Substring` methods such as `index(after:)` and `distance(from:to:)`
216+
/// implicitly round their input indices down to the nearest valid index.
217+
/// This operation lets you perform this rounding yourself. For example, this
218+
/// can be used to safely check if `index(before:)` would consider some
219+
/// arbitrary index equivalent to the start index before calling it.
220+
///
221+
/// - Parameter i: An index that is valid in at least one view of this
222+
/// substring.
223+
/// - Returns: The largest valid index within this substring that doesn't
224+
/// exceed `i`.
225+
@available(SwiftStdlib 5.8, *)
226+
public // SPI(Foundation) FIXME: This should be API
227+
func _index(roundingDown i: Index) -> Index {
228+
_wholeGuts.validateInclusiveCharacterIndex(i, in: _bounds)
229+
}
230+
}
231+
232+
extension String.UnicodeScalarView {
233+
/// Returns the largest valid index in `self` that does not exceed the given
234+
/// position.
235+
///
236+
/// Methods such as `index(after:)` and `distance(from:to:)` implicitly round
237+
/// their input indices down to the nearest valid index. This operation lets
238+
/// you perform this rounding yourself. For example, this can be used to
239+
/// safely check if `index(before:)` would consider some arbitrary index
240+
/// equivalent to the start index before calling it.
241+
///
242+
/// - Parameter i: An index that is valid in at least one view of the string
243+
/// shared by this view.
244+
/// - Returns: The largest valid index within this view that doesn't exceed
245+
/// `i`.
246+
@_alwaysEmitIntoClient
247+
public // SPI(Foundation) FIXME: This should be API
248+
func _index(roundingDown i: Index) -> Index {
249+
_guts.validateInclusiveScalarIndex(i)
250+
}
251+
}
252+
253+
extension Substring.UnicodeScalarView {
254+
/// Returns the largest valid index in `self` that does not exceed the given
255+
/// position.
256+
///
257+
/// Methods such as `index(after:)` and `distance(from:to:)` implicitly round
258+
/// their input indices down to the nearest valid index. This operation lets
259+
/// you perform this rounding yourself. For example, this can be used to
260+
/// safely check if `index(before:)` would consider some arbitrary index
261+
/// equivalent to the start index before calling it.
262+
///
263+
/// - Parameter i: An index that is valid in at least one view of the
264+
/// substring shared by this view.
265+
/// - Returns: The largest valid index within this view that doesn't exceed
266+
/// `i`.
267+
@_alwaysEmitIntoClient
268+
public // SPI(Foundation) FIXME: This should be API
269+
func _index(roundingDown i: Index) -> Index {
270+
_wholeGuts.validateInclusiveScalarIndex(i, in: _bounds)
271+
}
272+
}
273+
274+
extension String.UTF8View {
275+
/// Returns the largest valid index in `self` that does not exceed the given
276+
/// position.
277+
///
278+
/// Methods such as `index(after:)` and `distance(from:to:)` implicitly round
279+
/// their input indices down to the nearest valid index. This operation lets
280+
/// you perform this rounding yourself. For example, this can be used to
281+
/// safely check if `index(before:)` would consider some arbitrary index
282+
/// equivalent to the start index before calling it.
283+
///
284+
/// - Parameter i: An index that is valid in at least one view of the
285+
/// substring shared by this view.
286+
/// - Returns: The largest valid index within this view that doesn't exceed
287+
/// `i`.
288+
@_alwaysEmitIntoClient
289+
public // SPI(Foundation) FIXME: This should be API
290+
func _index(roundingDown i: Index) -> Index {
291+
let i = _guts.validateInclusiveSubscalarIndex(i)
292+
guard _guts.isForeign else { return i.strippingTranscoding._knownUTF8 }
293+
return _utf8AlignForeignIndex(i)
294+
}
295+
}
296+
297+
extension Substring.UTF8View {
298+
/// Returns the largest valid index in `self` that does not exceed the given
299+
/// position.
300+
///
301+
/// Methods such as `index(after:)` and `distance(from:to:)` implicitly round
302+
/// their input indices down to the nearest valid index. This operation lets
303+
/// you perform this rounding yourself. For example, this can be used to
304+
/// safely check if `index(before:)` would consider some arbitrary index
305+
/// equivalent to the start index before calling it.
306+
///
307+
/// - Parameter i: An index that is valid in at least one view of the
308+
/// substring shared by this view.
309+
/// - Returns: The largest valid index within this view that doesn't exceed
310+
/// `i`.
311+
@_alwaysEmitIntoClient
312+
public // SPI(Foundation) FIXME: This should be API
313+
func _index(roundingDown i: Index) -> Index {
314+
let i = _wholeGuts.validateInclusiveSubscalarIndex(i, in: _bounds)
315+
guard _wholeGuts.isForeign else { return i.strippingTranscoding._knownUTF8 }
316+
return _slice._base._utf8AlignForeignIndex(i)
317+
}
318+
}
319+
320+
extension String.UTF16View {
321+
/// Returns the valid index in `self` that this view considers equivalent to
322+
/// the given index.
323+
///
324+
/// Indices in the UTF-8 view that address positions between Unicode scalars
325+
/// are rounded down to the nearest scalar boundary; other indices are left as
326+
/// is.
327+
///
328+
/// - Parameter i: An index that is valid in at least one view of the
329+
/// substring shared by this view.
330+
/// - Returns: The valid index in `self` that this view considers equivalent
331+
/// to `i`.
332+
@_alwaysEmitIntoClient
333+
public // SPI(Foundation) FIXME: This should be API
334+
func _index(roundingDown i: Index) -> Index {
335+
let i = _guts.validateInclusiveSubscalarIndex(i)
336+
if _guts.isForeign { return i.strippingTranscoding._knownUTF16 }
337+
return _utf16AlignNativeIndex(i)
338+
}
339+
}
340+
341+
extension Substring.UTF16View {
342+
/// Returns the valid index in `self` that this view considers equivalent to
343+
/// the given index.
344+
///
345+
/// Indices in the UTF-8 view that address positions between Unicode scalars
346+
/// are rounded down to the nearest scalar boundary; other indices are left as
347+
/// is.
348+
///
349+
/// - Parameter i: An index that is valid in at least one view of the
350+
/// substring shared by this view.
351+
/// - Returns: The valid index in `self` that this view considers equivalent
352+
/// to `i`.
353+
@_alwaysEmitIntoClient
354+
public // SPI(Foundation) FIXME: This should be API
355+
func _index(roundingDown i: Index) -> Index {
356+
let i = _wholeGuts.validateInclusiveSubscalarIndex(i, in: _bounds)
357+
if _wholeGuts.isForeign { return i.strippingTranscoding._knownUTF16 }
358+
return _slice._base._utf16AlignNativeIndex(i)
359+
}
360+
}

stdlib/public/core/StringIndexValidation.swift

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
//
33
// This source file is part of the Swift.org open source project
44
//
5-
// Copyright (c) 2014 - 2022 Apple Inc. and the Swift project authors
5+
// Copyright (c) 2014 - 2023 Apple Inc. and the Swift project authors
66
// Licensed under Apache License v2.0 with Runtime Library Exception
77
//
88
// See https://swift.org/LICENSE.txt for license information
@@ -54,6 +54,7 @@ extension _StringGuts {
5454
return i
5555
}
5656

57+
@_alwaysEmitIntoClient
5758
internal func validateInclusiveSubscalarIndex(
5859
_ i: String.Index,
5960
in bounds: Range<String.Index>
@@ -175,6 +176,7 @@ extension _StringGuts {
175176
/// - has an encoding that matches this string,
176177
/// - is within the bounds of this string (including the `endIndex`), and
177178
/// - is aligned on a scalar boundary.
179+
@_alwaysEmitIntoClient
178180
internal func validateInclusiveScalarIndex(
179181
_ i: String.Index,
180182
in bounds: Range<String.Index>

stdlib/public/core/StringUTF8View.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -443,7 +443,7 @@ extension String.UTF8View {
443443
// (referring to a continuation byte) and returns `idx`. Otherwise, this will
444444
// scalar-align the index. This is needed because we may be passed a
445445
// non-scalar-aligned foreign index from the UTF16View.
446-
@inline(__always)
446+
@_alwaysEmitIntoClient @inline(__always)
447447
internal func _utf8AlignForeignIndex(_ idx: String.Index) -> String.Index {
448448
_internalInvariant(_guts.isForeign)
449449
guard idx.transcodedOffset == 0 else { return idx }

test/stdlib/StringIndex.swift

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1122,3 +1122,90 @@ suite.test("Substring.removeSubrange entire range") {
11221122
expectTrue(b.isEmpty)
11231123
#endif
11241124
}
1125+
1126+
if #available(SwiftStdlib 5.8, *) {
1127+
suite.test("String index rounding/Characters")
1128+
.forEach(in: examples) { string in
1129+
for index in string.allIndices(includingEnd: true) {
1130+
let end = string.endIndex
1131+
let expected = (index < end
1132+
? string.indices.lastIndex { $0 <= index }!
1133+
: end)
1134+
let actual = string._index(roundingDown: index)
1135+
expectEqual(actual, expected,
1136+
"""
1137+
index: \(index._description)
1138+
actual: \(actual._description)
1139+
expected: \(expected._description)
1140+
""")
1141+
}
1142+
}
1143+
}
1144+
1145+
suite.test("String index rounding/Scalars")
1146+
.forEach(in: examples) { string in
1147+
for index in string.allIndices(includingEnd: true) {
1148+
let end = string.unicodeScalars.endIndex
1149+
let expected = (index < end
1150+
? string.unicodeScalars.indices.lastIndex { $0 <= index }!
1151+
: end)
1152+
let actual = string.unicodeScalars._index(roundingDown: index)
1153+
expectEqual(actual, expected,
1154+
"""
1155+
index: \(index._description)
1156+
actual: \(actual._description)
1157+
expected: \(expected._description)
1158+
""")
1159+
}
1160+
}
1161+
1162+
suite.test("String index rounding/UTF-16")
1163+
.forEach(in: examples) { string in
1164+
//string.dumpIndices()
1165+
var utf16Indices = Set(string.utf16.indices)
1166+
utf16Indices.insert(string.utf16.endIndex)
1167+
1168+
for index in string.allIndices(includingEnd: true) {
1169+
let expected: String.Index
1170+
if utf16Indices.contains(index) {
1171+
expected = index
1172+
} else {
1173+
// If the index isn't valid in the UTF-16 view, it gets rounded down
1174+
// to the nearest scalar boundary. (Unintuitively, this is generally *not*
1175+
// the closest valid index within the UTF-16 view.)
1176+
expected = string.unicodeScalars.indices.lastIndex { $0 <= index }!
1177+
}
1178+
let actual = string.utf16._index(roundingDown: index)
1179+
expectEqual(actual, expected,
1180+
"""
1181+
index: \(index._description)
1182+
actual: \(actual._description)
1183+
expected: \(expected._description)
1184+
""")
1185+
}
1186+
}
1187+
1188+
suite.test("String index rounding/UTF-8")
1189+
.forEach(in: examples) { string in
1190+
//string.dumpIndices()
1191+
var utf8Indices = Set(string.utf8.indices)
1192+
utf8Indices.insert(string.utf8.endIndex)
1193+
for index in string.allIndices(includingEnd: true) {
1194+
let expected: String.Index
1195+
if utf8Indices.contains(index) {
1196+
expected = index
1197+
} else {
1198+
// If the index isn't valid in the UTF-8 view, it gets rounded down
1199+
// to the nearest scalar boundary. (Unintuitively, this is generally *not*
1200+
// the closest valid index within the UTF-8 view.)
1201+
expected = string.unicodeScalars.indices.lastIndex { $0 <= index }!
1202+
}
1203+
let actual = string.utf8._index(roundingDown: index)
1204+
expectEqual(actual, expected,
1205+
"""
1206+
index: \(index._description)
1207+
actual: \(actual._description)
1208+
expected: \(expected._description)
1209+
""")
1210+
}
1211+
}

0 commit comments

Comments
 (0)