Skip to content

Commit edd13ec

Browse files
authored
Merge pull request #9188 from apple/new-unicode-decoders
2 parents 9e12b95 + d8f1caf commit edd13ec

File tree

9 files changed

+113
-207
lines changed

9 files changed

+113
-207
lines changed

stdlib/public/core/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,6 @@ set(SWIFTLIB_ESSENTIAL
132132
SwiftNativeNSArray.swift
133133
UIntBuffer.swift
134134
UnavailableStringAPIs.swift.gyb
135-
Unicode.swift
136135
UnicodeEncoding.swift
137136
UnicodeParser.swift
138137
UnicodeScalar.swift
@@ -146,6 +145,7 @@ set(SWIFTLIB_ESSENTIAL
146145
UTFEncoding.swift
147146
UTF8.swift
148147
UTF16.swift
148+
Unicode.swift # ORDER DEPENDENCY: must follow new unicode support
149149
WriteBackMutableSlice.swift
150150
)
151151

stdlib/public/core/Collection.swift

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,7 @@ public struct IndexingIterator<
379379
> : IteratorProtocol, Sequence {
380380

381381
@_inlineable
382+
@inline(__always)
382383
/// Creates an iterator over the given collection.
383384
public /// @testable
384385
init(_elements: Elements) {
@@ -387,6 +388,7 @@ public struct IndexingIterator<
387388
}
388389

389390
@_inlineable
391+
@inline(__always)
390392
/// Creates an iterator over the given collection.
391393
public /// @testable
392394
init(_elements: Elements, _position: Elements.Index) {
@@ -419,12 +421,14 @@ public struct IndexingIterator<
419421
/// - Returns: The next element in the underlying sequence if a next element
420422
/// exists; otherwise, `nil`.
421423
@_inlineable
424+
@inline(__always)
422425
public mutating func next() -> Elements._Element? {
423426
if _position == _elements.endIndex { return nil }
424427
let element = _elements[_position]
425428
_elements.formIndex(after: &_position)
426429
return element
427430
}
431+
428432
@_versioned
429433
internal let _elements: Elements
430434
@_versioned
@@ -1326,12 +1330,15 @@ extension Collection {
13261330
/// // Prints "10"
13271331
@_inlineable
13281332
public var first: Iterator.Element? {
1329-
// NB: Accessing `startIndex` may not be O(1) for some lazy collections,
1330-
// so instead of testing `isEmpty` and then returning the first element,
1331-
// we'll just rely on the fact that the iterator always yields the
1332-
// first element first.
1333-
var i = makeIterator()
1334-
return i.next()
1333+
@inline(__always)
1334+
get {
1335+
// NB: Accessing `startIndex` may not be O(1) for some lazy collections,
1336+
// so instead of testing `isEmpty` and then returning the first element,
1337+
// we'll just rely on the fact that the iterator always yields the
1338+
// first element first.
1339+
var i = makeIterator()
1340+
return i.next()
1341+
}
13351342
}
13361343

13371344
// TODO: swift-3-indexing-model - uncomment and replace above ready (or should we still use the iterator one?)

stdlib/public/core/IntegerParsing.swift.gyb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ word_bits = int(CMAKE_SIZEOF_VOID_P) * 8
2828
/// - Note: If `text` begins with `"+"` or `"-"`, even if the rest of
2929
/// the characters are `"0"`, the result is `nil`.
3030
// FIXME(integers): support a more general BinaryInteger protocol
31+
@inline(__always)
3132
internal func _parseUnsignedAsciiAsUInt64(
3233
_ u16: String.UTF16View, _ radix: Int, _ maximum: UInt64
3334
) -> UInt64? {
@@ -70,6 +71,7 @@ internal func _parseUnsignedAsciiAsUInt64(
7071
/// - Note: For text matching the regular expression "-0+", the result
7172
/// is `0`, not `nil`.
7273
// FIXME(integers): support a more general BinaryInteger protocol
74+
@inline(__always)
7375
internal func _parseAsciiAsUInt64(
7476
_ utf16: String.UTF16View, _ radix: Int, _ maximum: UInt64
7577
) -> UInt64? {
@@ -92,6 +94,7 @@ internal func _parseAsciiAsUInt64(
9294
/// - Note: For text matching the regular expression "-0+", the result
9395
/// is `0`, not `nil`.
9496
// FIXME(integers): support a more general BinaryInteger protocol
97+
@inline(__always)
9598
internal func _parseAsciiAsInt64(
9699
_ utf16: String.UTF16View, _ radix: Int, _ maximum: Int64
97100
) -> Int64? {
@@ -109,6 +112,7 @@ internal func _parseAsciiAsInt64(
109112
}
110113

111114
/// Strip an optional single leading ASCII plus/minus sign from `utf16`.
115+
@inline(__always)
112116
private func _parseOptionalAsciiSign(
113117
_ utf16: String.UTF16View
114118
) -> (digitsUTF16: String.UTF16View, isMinus: Bool) {

stdlib/public/core/String.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ import SwiftShims
1414

1515
public protocol StringProtocol
1616
: RangeReplaceableCollection, BidirectionalCollection,
17-
CustomStringConvertible, CustomDebugStringConvertible,
17+
CustomDebugStringConvertible,
1818
CustomReflectable, CustomPlaygroundQuickLookable,
1919
TextOutputStream, TextOutputStreamable,
2020
LosslessStringConvertible, ExpressibleByStringLiteral,

stdlib/public/core/StringUTF8.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ extension _StringCore {
3232
/// and the second element contains the encoded UTF-8 starting in its
3333
/// low byte. Any unused high bytes in the result will be set to
3434
/// 0xFF.
35+
@inline(__always)
3536
func _encodeSomeUTF8(from i: Int) -> (Int, _UTF8Chunk) {
3637
_sanityCheck(i <= count)
3738

stdlib/public/core/UTF8.swift

Lines changed: 51 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -9,17 +9,29 @@
99
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
1010
//
1111
//===----------------------------------------------------------------------===//
12+
extension _Unicode {
13+
@_fixed_layout
14+
public enum UTF8 {
15+
case _swift3Buffer(_Unicode.UTF8.ForwardParser)
16+
}
17+
}
18+
1219
extension _Unicode.UTF8 : UnicodeEncoding {
20+
public typealias CodeUnit = UInt8
1321
public typealias EncodedScalar = _UIntBuffer<UInt32, UInt8>
1422

1523
public static var encodedReplacementCharacter : EncodedScalar {
1624
return EncodedScalar(_storage: 0xbdbfef, _bitCount: 24)
1725
}
1826

27+
@inline(__always)
28+
@_inlineable
1929
public static func _isScalar(_ x: CodeUnit) -> Bool {
2030
return x & 0x80 == 0
2131
}
2232

33+
@inline(__always)
34+
@_inlineable
2335
public static func decode(_ source: EncodedScalar) -> UnicodeScalar {
2436
let bits = source._storage
2537
switch source._bitCount {
@@ -44,50 +56,56 @@ extension _Unicode.UTF8 : UnicodeEncoding {
4456
}
4557
}
4658

59+
@inline(__always)
60+
@_inlineable
4761
public static func encode(_ source: UnicodeScalar) -> EncodedScalar {
48-
let x = source.value
49-
if _fastPath(x < (1 << 7)) {
50-
return EncodedScalar(_storage: x, _bitCount: 8)
51-
}
52-
else if _fastPath(x < (1 << 11)) {
53-
var r = x &>> 6
54-
r |= (x & 0b11_1111) &<< 8
55-
r |= 0b1000_0000__1100_0000
56-
return EncodedScalar(_storage: r, _bitCount: 2*8)
57-
}
58-
else if _fastPath(x < (1 << 16)) {
59-
var r = x &>> 12
60-
r |= (x & 0b1111__1100_0000) &<< 2
61-
r |= (x & 0b11_1111) &<< 16
62-
r |= 0b1000_0000__1000_0000__1110_0000
63-
return EncodedScalar(_storage: r, _bitCount: 3*8)
64-
}
65-
else {
66-
var r = x &>> 18
67-
r |= (x & 0b11__1111_0000__0000_0000) &>> 4
68-
r |= (x & 0b1111__1100_0000) &<< 10
69-
r |= (x & 0b11_1111) << 24
70-
r |= 0b1000_0000__1000_0000__1000_0000__1111_0000
71-
return EncodedScalar(_storage: r, _bitCount: 4*8)
72-
}
62+
var c = source.value
63+
if _fastPath(c < (1&<<7)) {
64+
return EncodedScalar(_storage: c, _bitCount: 8)
65+
}
66+
var o = c & 0b0__0011_1111
67+
c &>>= 6
68+
o &<<= 8
69+
if _fastPath(c < (1&<<5)) {
70+
return EncodedScalar(
71+
_storage: o | c | 0b0__1000_0000__1100_0000, _bitCount: 16)
72+
}
73+
o |= c & 0b0__0011_1111
74+
c &>>= 6
75+
o &<<= 8
76+
if _fastPath(c < (1&<<4)) {
77+
return EncodedScalar(
78+
_storage: o | c | 0b0__1000_0000__1000_0000__1110_0000, _bitCount: 24)
79+
}
80+
o |= c & 0b0__0011_1111
81+
c &>>= 6
82+
o &<<= 8
83+
return EncodedScalar(
84+
_storage: o | c | 0b0__1000_0000__1000_0000__1000_0000__1111_0000,
85+
_bitCount: 32)
7386
}
7487

7588
public struct ForwardParser {
7689
public typealias _Buffer = _UIntBuffer<UInt32, UInt8>
90+
@inline(__always)
91+
@_inlineable
7792
public init() { _buffer = _Buffer() }
7893
public var _buffer: _Buffer
7994
}
8095

8196
public struct ReverseParser {
8297
public typealias _Buffer = _UIntBuffer<UInt32, UInt8>
98+
@inline(__always)
99+
@_inlineable
83100
public init() { _buffer = _Buffer() }
84101
public var _buffer: _Buffer
85102
}
86103
}
87104

88105
extension UTF8.ReverseParser : UnicodeParser, _UTFParser {
89106
public typealias Encoding = _Unicode.UTF8
90-
107+
@inline(__always)
108+
@_inlineable
91109
public func _parseMultipleCodeUnits() -> (isValid: Bool, bitCount: UInt8) {
92110
_sanityCheck(_buffer._storage & 0x80 != 0) // this case handled elsewhere
93111
if _buffer._storage & 0b0__1110_0000__1100_0000
@@ -122,6 +140,7 @@ extension UTF8.ReverseParser : UnicodeParser, _UTFParser {
122140
/// Returns the length of the invalid sequence that ends with the LSB of
123141
/// buffer.
124142
@inline(never)
143+
@_versioned
125144
func _invalidLength() -> UInt8 {
126145
if _buffer._storage & 0b0__1111_0000__1100_0000
127146
== 0b0__1110_0000__1000_0000 {
@@ -150,6 +169,8 @@ extension UTF8.ReverseParser : UnicodeParser, _UTFParser {
150169
return 1
151170
}
152171

172+
@inline(__always)
173+
@_inlineable
153174
public func _bufferedScalar(bitCount: UInt8) -> Encoding.EncodedScalar {
154175
return Encoding.EncodedScalar(
155176
_storage: _buffer._storage.byteSwapped &>> (32 - bitCount),
@@ -160,7 +181,9 @@ extension UTF8.ReverseParser : UnicodeParser, _UTFParser {
160181

161182
extension _Unicode.UTF8.ForwardParser : UnicodeParser, _UTFParser {
162183
public typealias Encoding = _Unicode.UTF8
163-
184+
185+
@inline(__always)
186+
@_inlineable
164187
public func _parseMultipleCodeUnits() -> (isValid: Bool, bitCount: UInt8) {
165188
_sanityCheck(_buffer._storage & 0x80 != 0) // this case handled elsewhere
166189

@@ -195,6 +218,7 @@ extension _Unicode.UTF8.ForwardParser : UnicodeParser, _UTFParser {
195218
/// Returns the length of the invalid sequence that starts with the LSB of
196219
/// buffer.
197220
@inline(never)
221+
@_versioned
198222
func _invalidLength() -> UInt8 {
199223
if _buffer._storage & 0b0__1100_0000__1111_0000
200224
== 0b0__1000_0000__1110_0000 {

stdlib/public/core/UTFEncoding.swift

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@ public protocol _UTFParser {
2727

2828
extension _UTFParser
2929
where Encoding.EncodedScalar == _UIntBuffer<UInt32, Encoding.CodeUnit> {
30-
30+
31+
@inline(__always)
3132
public mutating func parseScalar<I : IteratorProtocol>(
3233
from input: inout I
3334
) -> _Unicode.ParseResult<Encoding.EncodedScalar>

0 commit comments

Comments
 (0)