Skip to content

Commit c25513e

Browse files
committed
Hide WordView for now (also separate Index type)
1 parent ec900f9 commit c25513e

File tree

5 files changed

+205
-287
lines changed

5 files changed

+205
-287
lines changed

stdlib/public/core/StringIndex.swift

Lines changed: 2 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ import SwiftShims
1717
String's Index has the following layout:
1818

1919
┌──────────┬────────────────╥────────────────┬───────╥───────┐
20-
│ b63:b16 │ b15:b14 ║ b13:b8 │ b7:b5b4:b0 │
20+
│ b63:b16 │ b15:b14 ║ b13:b8 │ b7:b4b3:b0 │
2121
├──────────┼────────────────╫────────────────┼───────╫───────┤
2222
│ position │ transc. offset ║ grapheme cache │ rsvd ║ flags │
2323
└──────────┴────────────────╨────────────────┴───────╨───────┘
@@ -43,7 +43,7 @@ isn't frozen.
4343
looking back at scalars preceding the index. (Substrings that don't start on a
4444
`Character` boundary heavily rely on this.)
4545

46-
- reserved: 3 unused bits available for future flags etc. The meaning of each
46+
- reserved: 4 unused bits available for future flags etc. The meaning of each
4747
bit may change between stdlib versions. These must be set to zero if
4848
constructing an index in inlinable code.
4949

@@ -70,11 +70,6 @@ isn't frozen.
7070
If set, the position is known to be expressed in UTF-16 code units.
7171
(Introduced in Swift 5.7)
7272

73-
* b4: `_isWordAligned`
74-
75-
If set, the index is known to be on a Unicode word boundary.
76-
(Introduced in Swift 5.7)
77-
7873
Before Swift 5.7, bits b1, b2 and b3 used to be part of the resilient slice. See
7974
the notes on Character Alignment and Index Encoding below to see how this works.
8075

@@ -266,9 +261,6 @@ extension String.Index {
266261
@_alwaysEmitIntoClient @inline(__always) // Swift 5.7
267262
internal static var __utf16Bit: UInt64 { 0x8 }
268263

269-
@_alwaysEmitIntoClient @inline(__always) // Swift 5.7
270-
internal static var __wordAlignmentBit: UInt64 { 0x10 }
271-
272264
@_alwaysEmitIntoClient @inline(__always) // Swift 5.7
273265
internal static func __encodingBit(utf16: Bool) -> UInt64 {
274266
let utf16 = Int8(Builtin.zext_Int1_Int8(utf16._value))
@@ -373,35 +365,11 @@ extension String.Index {
373365
}
374366
}
375367

376-
// ### Word Alignment
377-
//
378-
// Enter some pretty cool information about Unicode words
379-
extension String.Index {
380-
@_alwaysEmitIntoClient // Swift 5.7
381-
@inline(__always)
382-
internal var _isWordAligned: Bool {
383-
_rawBits & Self.__wordAlignmentBit != 0
384-
}
385-
386-
@_alwaysEmitIntoClient // Swift 5.7
387-
@inline(__always)
388-
internal var _wordAligned: String.Index {
389-
let r = _rawBits
390-
| Self.__wordAlignmentBit
391-
| Self.__characterAlignmentBit
392-
| Self.__scalarAlignmentBit
393-
let idx = Self(r)
394-
idx._invariantCheck()
395-
return idx
396-
}
397-
}
398-
399368
extension String.Index {
400369
@_alwaysEmitIntoClient // Swift 5.7
401370
internal func _copyingAlignment(from index: Self) -> Self {
402371
let mask = Self.__scalarAlignmentBit
403372
| Self.__characterAlignmentBit
404-
| Self.__wordAlignmentBit
405373
return Self((_rawBits & ~mask) | (index._rawBits & mask))
406374
}
407375
}

stdlib/public/core/StringIndexValidation.swift

Lines changed: 13 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,6 @@ extension _StringGuts {
2121
internal func isFastCharacterIndex(_ i: String.Index) -> Bool {
2222
hasMatchingEncoding(i) && i._isCharacterAligned
2323
}
24-
25-
@_alwaysEmitIntoClient @inline(__always)
26-
internal func isFastWordIndex(_ i: String.Index) -> Bool {
27-
hasMatchingEncoding(i) && i._isWordAligned
28-
}
2924
}
3025

3126
// Subscalar index validation (UTF-8 & UTF-16 views)
@@ -305,47 +300,6 @@ extension _StringGuts {
305300
scalarAlign(validateInclusiveSubscalarIndex(i, in: bounds)),
306301
in: bounds)
307302
}
308-
309-
internal func validateCharacterRange(
310-
_ range: Range<String.Index>
311-
) -> Range<String.Index> {
312-
if
313-
isFastCharacterIndex(range.lowerBound),
314-
isFastCharacterIndex(range.upperBound)
315-
{
316-
_precondition(range.upperBound._encodedOffset <= count,
317-
"String index range is out of bounds")
318-
return range
319-
}
320-
321-
let r = validateSubscalarRange(range)
322-
let l = roundDownToNearestCharacter(scalarAlign(r.lowerBound))
323-
let u = roundDownToNearestCharacter(scalarAlign(r.upperBound))
324-
return Range(_uncheckedBounds: (l, u))
325-
}
326-
327-
internal func validateCharacterRange(
328-
_ range: Range<String.Index>,
329-
in bounds: Range<String.Index>
330-
) -> Range<String.Index> {
331-
_internalInvariant(bounds.upperBound <= endIndex)
332-
333-
if
334-
isFastCharacterIndex(range.lowerBound),
335-
isFastCharacterIndex(range.upperBound)
336-
{
337-
_precondition(
338-
range.lowerBound >= bounds.lowerBound
339-
&& range.upperBound <= bounds.upperBound,
340-
"String index range is out of bounds")
341-
return range
342-
}
343-
344-
let r = validateSubscalarRange(range, in: bounds)
345-
let l = roundDownToNearestCharacter(scalarAlign(r.lowerBound), in: bounds)
346-
let u = roundDownToNearestCharacter(scalarAlign(r.upperBound), in: bounds)
347-
return Range(_uncheckedBounds: (l, u))
348-
}
349303
}
350304

351305
// Temporary additions to deal with binary compatibility issues with existing
@@ -448,24 +402,24 @@ extension _StringGuts {
448402
// Word index validation (String)
449403
extension _StringGuts {
450404
@available(SwiftStdlib 5.7, *)
451-
internal func validateWordIndex(_ i: String.Index) -> String.Index {
452-
if isFastWordIndex(i) {
453-
_precondition(i._encodedOffset < count, "String index is out of bounds")
454-
return i
455-
}
405+
internal func validateWordIndex(
406+
_ i: String._WordView.Index
407+
) -> String._WordView.Index {
408+
let i = String.Index(_encodedOffset: i._encodedOffset)
456409

457-
return roundDownToNearestWord(scalarAlign(validateSubscalarIndex(i)))
410+
return roundDownToNearestWord(
411+
String._WordView.Index(scalarAlign(validateSubscalarIndex(i)))
412+
)
458413
}
459414

460415
@available(SwiftStdlib 5.7, *)
461-
internal func validateInclusiveWordIndex(_ i: String.Index) -> String.Index {
462-
if isFastWordIndex(i) {
463-
_precondition(i._encodedOffset < count, "String index is out of bounds")
464-
return i
465-
}
416+
internal func validateInclusiveWordIndex(
417+
_ i: String._WordView.Index
418+
) -> String._WordView.Index {
419+
let i = String.Index(_encodedOffset: i._encodedOffset)
466420

467-
return roundDownToNearestCharacter(
468-
scalarAlign(validateInclusiveSubscalarIndex(i))
421+
return roundDownToNearestWord(
422+
String._WordView.Index(scalarAlign(validateInclusiveSubscalarIndex(i)))
469423
)
470424
}
471425
}

stdlib/public/core/StringWordBreaking.swift

Lines changed: 101 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -11,42 +11,119 @@
1111

1212
extension _StringGuts {
1313
@inline(__always)
14-
@available(SwiftStdlib 5.7, *)
15-
internal func roundDownToNearestWord(_ i: String.Index) -> String.Index {
16-
_internalInvariant(i._isScalarAligned)
17-
_internalInvariant(hasMatchingEncoding(i))
14+
internal func roundDownToNearestWord(
15+
_ i: String._WordView.Index
16+
) -> String._WordView.Index {
1817
_internalInvariant(i._encodedOffset <= count)
1918

20-
if _fastPath(i._isWordAligned) {
21-
return i
22-
}
23-
2419
let offset = i._encodedOffset
2520

2621
if offset == 0 || offset == count {
27-
return i._wordAligned
22+
return i
2823
}
2924

3025
return _slowRoundDownToNearestWord(i)
3126
}
3227

3328
@inline(never)
34-
@available(SwiftStdlib 5.7, *)
35-
internal func _slowRoundDownToNearestWord(_ i: String.Index) -> String.Index {
36-
let words = String._WordView(self)
37-
29+
internal func _slowRoundDownToNearestWord(
30+
_ i: String._WordView.Index
31+
) -> String._WordView.Index {
3832
let offset = i._encodedOffset
39-
let start = offset &- words._uncheckedIndex(before: i)._encodedOffset
40-
let startIndex = String.Index(_encodedOffset: start)._wordAligned
41-
let stride = words._uncheckedIndex(after: startIndex)._encodedOffset
33+
let start = _opaquePreviousWordIndex(endingAt: offset)
34+
let stride = _opaqueNextWordIndex(startingAt: start) &- start
4235
_internalInvariant(offset <= start + stride, "Word breaking inconsistency")
4336

4437
if offset >= start + stride {
45-
return i._wordAligned
38+
return i
4639
}
4740

48-
let r = String.Index(_encodedOffset: start)._wordAligned
49-
return markEncoding(r)
41+
return String._WordView.Index(_encodedOffset: start)
42+
}
43+
44+
@inline(never)
45+
@_effects(releasenone)
46+
internal func _opaqueNextWordIndex(startingAt i: Int) -> Int {
47+
if _slowPath(isForeign) {
48+
return _foreignOpaqueNextWordIndex(startingAt: i)
49+
}
50+
51+
return withFastUTF8 { utf8 in
52+
nextWordBoundary(startingAt: i) {
53+
_internalInvariant($0 >= 0)
54+
55+
guard $0 < utf8.count else {
56+
return nil
57+
}
58+
59+
let (scalar, len) = _decodeScalar(utf8, startingAt: $0)
60+
return (scalar, $0 &+ len)
61+
}
62+
}
63+
}
64+
65+
internal func _foreignOpaqueNextWordIndex(startingAt i: Int) -> Int {
66+
#if _runtime(_ObjC)
67+
return nextWordBoundary(startingAt: i) {
68+
_internalInvariant($0 >= 0)
69+
70+
guard $0 < count else {
71+
return nil
72+
}
73+
74+
let scalars = String.UnicodeScalarView(self)
75+
let idx = String.Index(_encodedOffset: $0)
76+
77+
let scalar = scalars[idx]
78+
let nextIndex = scalars.index(after: idx)
79+
80+
return (scalar, nextIndex._encodedOffset)
81+
}
82+
#else
83+
fatalError("No foreign strings on this platform in this version of Swift.")
84+
#endif
85+
}
86+
87+
internal func _opaquePreviousWordIndex(endingAt i: Int) -> Int {
88+
if _slowPath(isForeign) {
89+
return _foreignOpaquePreviousWordIndex(endingAt: i)
90+
}
91+
92+
return withFastUTF8 { utf8 in
93+
previousWordBoundary(endingAt: i) {
94+
_internalInvariant($0 <= count)
95+
96+
guard $0 > 0 else {
97+
return nil
98+
}
99+
100+
let (scalar, len) = _decodeScalar(utf8, endingAt: $0)
101+
return (scalar, $0 &- len)
102+
}
103+
}
104+
}
105+
106+
@inline(never)
107+
internal func _foreignOpaquePreviousWordIndex(endingAt i: Int) -> Int {
108+
#if _runtime(_ObjC)
109+
return previousWordBoundary(endingAt: i) {
110+
_internalInvariant($0 <= count)
111+
112+
guard $0 > 0 else {
113+
return nil
114+
}
115+
116+
let scalars = String.UnicodeScalarView(self)
117+
let idx = String.Index(_encodedOffset: $0)
118+
119+
let previousIndex = scalars.index(before: idx)
120+
let scalar = scalars[previousIndex]
121+
122+
return (scalar, previousIndex._encodedOffset)
123+
}
124+
#else
125+
fatalError("No foreign strings on this platform in this version of Swift.")
126+
#endif
50127
}
51128
}
52129

@@ -77,10 +154,9 @@ internal struct _WordBreakingState {
77154
var shouldBreakRI = false
78155
}
79156

80-
@available(SwiftStdlib 5.7, *)
81-
extension String._WordView {
157+
extension _StringGuts {
82158
// Returns the stride of the next word at the previous boundary offset.
83-
internal func nextBoundary(
159+
internal func nextWordBoundary(
84160
startingAt index: Int,
85161
nextScalar: (Int) -> (scalar: Unicode.Scalar, end: Int)?
86162
) -> Int {
@@ -109,7 +185,7 @@ extension String._WordView {
109185
}
110186

111187
// Returns the stride of the previous word at the current boundary offset.
112-
internal func previousBoundary(
188+
internal func previousWordBoundary(
113189
endingAt index: Int,
114190
previousScalar: (Int) -> (scalar: Unicode.Scalar, start: Int)?
115191
) -> Int {
@@ -145,8 +221,7 @@ extension String._WordView {
145221
}
146222
}
147223

148-
@available(SwiftStdlib 5.7, *)
149-
extension String._WordView {
224+
extension _StringGuts {
150225
// The "algorithm" that determines whether or not we should break between
151226
// certain word break properties.
152227
//
@@ -345,8 +420,7 @@ extension String._WordView {
345420
}
346421
}
347422

348-
@available(SwiftStdlib 5.7, *)
349-
extension String._WordView {
423+
extension _StringGuts {
350424
// The "algorithm" that determines whether or not we should break between
351425
// certain word break properties.
352426
//

0 commit comments

Comments
 (0)