Skip to content

[String] Switch scalar-aligned bit to a reserved bit. #25948

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 3, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions stdlib/public/core/StringCharacterView.swift
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,10 @@ extension String: BidirectionalCollection {
let stride = _characterStride(startingAt: i)
let nextOffset = i._encodedOffset &+ stride
let nextStride = _characterStride(
startingAt: Index(_encodedOffset: nextOffset)._aligned)
startingAt: Index(_encodedOffset: nextOffset)._scalarAligned)

return Index(
encodedOffset: nextOffset, characterStride: nextStride)._aligned
encodedOffset: nextOffset, characterStride: nextStride)._scalarAligned
}

/// Returns the position immediately before the given index.
Expand All @@ -82,7 +82,8 @@ extension String: BidirectionalCollection {
let i = _guts.scalarAlign(i)
let stride = _characterStride(endingAt: i)
let priorOffset = i._encodedOffset &- stride
return Index(encodedOffset: priorOffset, characterStride: stride)._aligned
return Index(
encodedOffset: priorOffset, characterStride: stride)._scalarAligned
}
/// Returns an index that is the specified distance from the given index.
///
Expand Down Expand Up @@ -200,7 +201,7 @@ extension String: BidirectionalCollection {

@inlinable @inline(__always)
internal func _characterStride(startingAt i: Index) -> Int {
_internalInvariant(i._isAligned)
_internalInvariant(i._isScalarAligned)

// Fast check if it's already been measured, otherwise check resiliently
if let d = i.characterStride { return d }
Expand All @@ -212,7 +213,7 @@ extension String: BidirectionalCollection {

@inlinable @inline(__always)
internal func _characterStride(endingAt i: Index) -> Int {
_internalInvariant(i._isAligned)
_internalInvariant(i._isScalarAligned)

if i == startIndex { return 0 }

Expand Down
4 changes: 2 additions & 2 deletions stdlib/public/core/StringGuts.swift
Original file line number Diff line number Diff line change
Expand Up @@ -268,11 +268,11 @@ extension _StringGuts {

@inlinable @inline(__always)
internal var startIndex: String.Index {
return Index(_encodedOffset: 0)._aligned
return Index(_encodedOffset: 0)._scalarAligned
}
@inlinable @inline(__always)
internal var endIndex: String.Index {
return Index(_encodedOffset: self.count)._aligned
return Index(_encodedOffset: self.count)._scalarAligned
}
}

Expand Down
101 changes: 56 additions & 45 deletions stdlib/public/core/StringIndex.swift
Original file line number Diff line number Diff line change
Expand Up @@ -16,25 +16,36 @@ import SwiftShims

String's Index has the following layout:

┌──────────┬───────────────────┬─────────╥────────────────┬──────────┐
│ b63:b16 │ b15:b14 │ b13 ║ b12:b8 │ b6:b0 │
├──────────┼───────────────────┼─────────╫────────────────┼──────────┤
│ position │ transcoded offset │ aligned ║ grapheme cache │ reserved │
└──────────┴───────────────────┴─────────╨────────────────┴──────────┘

Position, transcoded offset, and aligned are fully exposed in the ABI. Grapheme
cache and reserved are partially resilient: the fact that there are 13 bits with
a default value of `0` is ABI, but not the layout, construction, or
┌──────────┬───────────────────╥────────────────┬──────────╥────────────────┐
│ b63:b16 │ b15:b14 ║ b13:b8 │ b7:b1 ║ b0 │
├──────────┼───────────────────╫────────────────┼──────────╫────────────────┤
│ position │ transcoded offset ║ grapheme cache │ reserved ║ scalar aligned │
└──────────┴───────────────────╨────────────────┴──────────╨────────────────┘
└──────── resilient ────────┘

Position, transcoded offset, and scalar aligned are fully exposed in the ABI.
Grapheme cache and reserved are partially resilient: the fact that there are 13
bits with a default value of `0` is ABI, but not the layout, construction, or
interpretation of those bits. All use of grapheme cache should be behind
non-inlinable function calls.
non-inlinable function calls. Inlinable code should not set a non-zero value to
grapheme cache bits: doing so breaks back deployment as they will be interpreted
as a set cache.

- position aka `encodedOffset`: A 48-bit offset into the string's code units

- transcoded offset: a 2-bit sub-scalar offset, derived from transcoding
- aligned, whether this index is known to be scalar-aligned (see below)

<resilience barrier>

- grapheme cache: A 6-bit value remembering the distance to the next grapheme
boundary.

- reserved: 7-bit for future use.

<resilience barrier>
- grapheme cache: A 5-bit value remembering the distance to the next grapheme
boundary
- reserved: 8-bit for future use.

- scalar aligned, whether this index is known to be scalar-aligned (see below)


*/
extension String {
Expand Down Expand Up @@ -86,7 +97,7 @@ extension String.Index {

@usableFromInline
internal var characterStride: Int? {
let value = (_rawBits & 0x1F00) &>> 8
let value = (_rawBits & 0x3F00) &>> 8
return value > 0 ? Int(truncatingIfNeeded: value) : nil
}

Expand Down Expand Up @@ -136,7 +147,7 @@ extension String.Index {
encodedOffset: Int, transcodedOffset: Int, characterStride: Int
) {
self.init(encodedOffset: encodedOffset, transcodedOffset: transcodedOffset)
if _slowPath(characterStride > 0x1F) { return }
if _slowPath(characterStride > 0x3F) { return }
self._rawBits |= UInt64(truncatingIfNeeded: characterStride &<< 8)
self._invariantCheck()
}
Expand All @@ -152,7 +163,7 @@ extension String.Index {
@usableFromInline @inline(never) @_effects(releasenone)
internal func _invariantCheck() {
_internalInvariant(_encodedOffset >= 0)
if self._isAligned {
if self._isScalarAligned {
_internalInvariant(transcodedOffset == 0)
}
}
Expand Down Expand Up @@ -209,35 +220,35 @@ extension String.Index {
}

/*
Index Alignment
Index Scalar Alignment

SE-0180 unifies the Index type of String and all its views and allows
non-scalar-aligned indices to be used across views. In order to guarantee
behavior, we often have to check and perform scalar alignment. To speed up
these checks, we allocate a bit denoting known-to-be-aligned, so that the
alignment check can skip the load. The below shows what views need to check
for alignment before they can operate, and whether the indices they produce
are aligned.

┌───────────────╥────────────────────┬──────────────────────────┐
│ View ║ Requires Alignment │ Produces Aligned Indices
╞═══════════════╬════════════════════╪══════════════════════════╡
│ Native UTF8 ║ no │ no
├───────────────╫────────────────────┼──────────────────────────┤
│ Native UTF16 ║ yes │ no
╞═══════════════╬════════════════════╪══════════════════════════╡
│ Foreign UTF8 ║ yes │ no
├───────────────╫────────────────────┼──────────────────────────┤
│ Foreign UTF16 ║ no │ no
╞═══════════════╬════════════════════╪══════════════════════════╡
│ UnicodeScalar ║ yes │ yes
├───────────────╫────────────────────┼──────────────────────────┤
│ Character ║ yes │ yes
└───────────────╨────────────────────┴──────────────────────────┘

The "requires alignment" applies to any operation taking a String.Index that's
not defined entirely in terms of other operations taking a String.Index. These
include:
these checks, we allocate a bit denoting known-to-be-scalar-aligned, so that
the alignment check can skip the load. The below shows what views need to
check for alignment before they can operate, and whether the indices they
produce are aligned.

┌───────────────╥───────────────────────────┬─────────────────────────┐
│ View ║ Requires Scalar Alignment │ Produces Scalar Aligned
╞═══════════════╬═══════════════════════════╪═════════════════════════╡
│ Native UTF8 ║ no │ no │
├───────────────╫───────────────────────────┼─────────────────────────┤
│ Native UTF16 ║ yes │ no │
╞═══════════════╬═══════════════════════════╪═════════════════════════╡
│ Foreign UTF8 ║ yes │ no │
├───────────────╫───────────────────────────┼─────────────────────────┤
│ Foreign UTF16 ║ no │ no │
╞═══════════════╬═══════════════════════════╪═════════════════════════╡
│ UnicodeScalar ║ yes │ yes │
├───────────────╫───────────────────────────┼─────────────────────────┤
│ Character ║ yes │ yes │
└───────────────╨───────────────────────────┴─────────────────────────┘

The "requires scalar alignment" applies to any operation taking a String.Index
that's not defined entirely in terms of other operations taking a
String.Index. These include:

* index(after:)
* index(before:)
Expand All @@ -249,13 +260,13 @@ extension String.Index {
extension String.Index {
@_alwaysEmitIntoClient // Swift 5.1
@inline(__always)
internal var _isAligned: Bool { return 0 != _rawBits & 0x2000 }
internal var _isScalarAligned: Bool { return 0 != _rawBits & 0x1 }

@_alwaysEmitIntoClient // Swift 5.1
@inline(__always)
internal var _aligned: String.Index {
internal var _scalarAligned: String.Index {
var idx = self
idx._rawBits |= 0x2000
idx._rawBits |= 0x1
idx._invariantCheck()
return idx
}
Expand Down
6 changes: 3 additions & 3 deletions stdlib/public/core/StringUTF16View.swift
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ extension String.UTF16View: BidirectionalCollection {
if len == 4 && idx.transcodedOffset == 0 {
return idx.nextTranscoded
}
return idx.strippingTranscoding.encoded(offsetBy: len)._aligned
return idx.strippingTranscoding.encoded(offsetBy: len)._scalarAligned
}

@inlinable @inline(__always)
Expand All @@ -178,7 +178,7 @@ extension String.UTF16View: BidirectionalCollection {

// Single UTF-16 code unit
_internalInvariant((1...3) ~= len)
return idx.encoded(offsetBy: -len)._aligned
return idx.encoded(offsetBy: -len)._scalarAligned
}

public func index(_ i: Index, offsetBy n: Int) -> Index {
Expand Down Expand Up @@ -587,7 +587,7 @@ extension String.UTF16View {
_internalInvariant(utf16Len == 2)
return Index(encodedOffset: readIdx, transcodedOffset: 1)
}
return Index(_encodedOffset: readIdx &+ len)._aligned
return Index(_encodedOffset: readIdx &+ len)._scalarAligned
}

readIdx &+= len
Expand Down
8 changes: 4 additions & 4 deletions stdlib/public/core/StringUnicodeScalarView.swift
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ extension String.UnicodeScalarView: BidirectionalCollection {

if _fastPath(_guts.isFastUTF8) {
let len = _guts.fastUTF8ScalarLength(startingAt: i._encodedOffset)
return i.encoded(offsetBy: len)._aligned
return i.encoded(offsetBy: len)._scalarAligned
}

return _foreignIndex(after: i)
Expand All @@ -137,7 +137,7 @@ extension String.UnicodeScalarView: BidirectionalCollection {
return _utf8ScalarLength(utf8, endingAt: i._encodedOffset)
}
_internalInvariant(len <= 4, "invalid UTF8")
return i.encoded(offsetBy: -len)._aligned
return i.encoded(offsetBy: -len)._scalarAligned
}

return _foreignIndex(before: i)
Expand Down Expand Up @@ -419,7 +419,7 @@ extension String.UnicodeScalarView {
let cu = _guts.foreignErrorCorrectedUTF16CodeUnit(at: i)
let len = UTF16.isLeadSurrogate(cu) ? 2 : 1

return i.encoded(offsetBy: len)._aligned
return i.encoded(offsetBy: len)._scalarAligned
}

@usableFromInline @inline(never)
Expand All @@ -430,6 +430,6 @@ extension String.UnicodeScalarView {
let cu = _guts.foreignErrorCorrectedUTF16CodeUnit(at: priorIdx)
let len = UTF16.isTrailSurrogate(cu) ? 2 : 1

return i.encoded(offsetBy: -len)._aligned
return i.encoded(offsetBy: -len)._scalarAligned
}
}
18 changes: 9 additions & 9 deletions stdlib/public/core/UnicodeHelpers.swift
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ extension _StringGuts {
@inline(__always) // fast-path: fold common fastUTF8 check
internal func scalarAlign(_ idx: Index) -> Index {
let result: String.Index
if _fastPath(idx._isAligned) {
if _fastPath(idx._isScalarAligned) {
result = idx
} else {
// TODO(String performance): isASCII check
Expand All @@ -172,28 +172,28 @@ extension _StringGuts {

_internalInvariant(isOnUnicodeScalarBoundary(result),
"Alignment bit is set for non-aligned index")
_internalInvariant(result._isAligned)
_internalInvariant(result._isScalarAligned)
return result
}

@inline(never) // slow-path
@_alwaysEmitIntoClient // Swift 5.1
internal func scalarAlignSlow(_ idx: Index) -> Index {
_internalInvariant(!idx._isAligned)
_internalInvariant(!idx._isScalarAligned)

if _slowPath(idx.transcodedOffset != 0 || idx._encodedOffset == 0) {
// Transcoded index offsets are already scalar aligned
return String.Index(_encodedOffset: idx._encodedOffset)._aligned
return String.Index(_encodedOffset: idx._encodedOffset)._scalarAligned
}
if _slowPath(self.isForeign) {
let foreignIdx = foreignScalarAlign(idx)
_internalInvariant(foreignIdx._isAligned)
_internalInvariant(foreignIdx._isScalarAligned)
return foreignIdx
}

return String.Index(_encodedOffset:
self.withFastUTF8 { _scalarAlign($0, idx._encodedOffset) }
)._aligned
)._scalarAligned
}

@inlinable
Expand Down Expand Up @@ -359,17 +359,17 @@ extension _StringGuts {
@usableFromInline @inline(never) // slow-path
@_effects(releasenone)
internal func foreignScalarAlign(_ idx: Index) -> Index {
guard idx._encodedOffset != self.count else { return idx._aligned }
guard idx._encodedOffset != self.count else { return idx._scalarAligned }

_internalInvariant(idx._encodedOffset < self.count)

let ecCU = foreignErrorCorrectedUTF16CodeUnit(at: idx)
if _fastPath(!UTF16.isTrailSurrogate(ecCU)) {
return idx._aligned
return idx._scalarAligned
}
_internalInvariant(idx._encodedOffset > 0,
"Error-correction shouldn't give trailing surrogate at position zero")
return String.Index(_encodedOffset: idx._encodedOffset &- 1)._aligned
return String.Index(_encodedOffset: idx._encodedOffset &- 1)._scalarAligned
}

@usableFromInline @inline(never)
Expand Down
1 change: 0 additions & 1 deletion stdlib/public/core/UnicodeScalar.swift
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,6 @@ extension Unicode.Scalar.UTF16View : RandomAccessCollection {
/// `endIndex` property.
@inlinable
public subscript(position: Int) -> UTF16.CodeUnit {
_internalInvariant((0..<self.count).contains(position))
if position == 1 { return UTF16.trailSurrogate(value) }
if endIndex == 1 { return UTF16.CodeUnit(value.value) }
return UTF16.leadSurrogate(value)
Expand Down