Skip to content

Commit fd95180

Browse files
authored
Merge pull request swiftlang#9 from lorentey/string_guts
More updates & fixes to failing tests
2 parents 5ebbd8e + adb5d21 commit fd95180

13 files changed

+495
-202
lines changed

stdlib/public/core/StringCharacterView.swift

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -70,16 +70,10 @@ extension String {
7070
@_versioned
7171
internal var _guts: _StringGuts
7272

73-
@_versioned
74-
internal var _core: _LegacyStringCore {
75-
get { return _guts._legacyCore }
76-
set { self._guts = _StringGuts(newValue) }
77-
}
78-
79-
/// The offset of this view's `_core` from an original core. This works
80-
/// around the fact that `_LegacyStringCore` is always zero-indexed.
81-
/// `_coreOffset` should be subtracted from `UnicodeScalarIndex.encodedOffset`
82-
/// before that value is used as a `_core` index.
73+
/// The offset of this view's `_guts` from an original guts. This works
74+
/// around the fact that `_StringGuts` is always zero-indexed.
75+
/// `_coreOffset` should be subtracted from `Index.encodedOffset` before
76+
/// that value is used as a `_guts` index.
8377
@_versioned
8478
internal var _coreOffset: Int
8579

@@ -794,8 +788,9 @@ extension String._CharacterView {
794788
/// Objective-C, where *n* is the length of the string; otherwise, O(1).
795789
@_inlineable // FIXME(sil-serialize-all)
796790
public subscript(bounds: Range<Index>) -> String.CharacterView {
797-
return String._CharacterView(
798-
unicodeScalars[bounds]._guts,
799-
coreOffset: bounds.lowerBound.encodedOffset)
791+
let scalarSlice: String.UnicodeScalarView = unicodeScalars[bounds]
792+
return String.CharacterView(
793+
scalarSlice._guts,
794+
coreOffset: scalarSlice._coreOffset)
800795
}
801796
}

stdlib/public/core/StringGuts.swift

Lines changed: 70 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ extension _StringGuts {
169169
get { return _twoByteCodeUnitBit }
170170
}
171171

172+
@_inlineable
172173
public // @testable
173174
var isSingleByte: Bool {
174175
@inline(__always) get {
@@ -178,11 +179,13 @@ extension _StringGuts {
178179
}
179180
}
180181

182+
@_inlineable
181183
public // @testable
182184
var byteWidth: Int {
183185
@inline(__always) get { return isSingleByte ? 1 : 2 }
184186
}
185187

188+
@_inlineable
186189
@_versioned
187190
var bitWidth: Int {
188191
@inline(__always) get { return byteWidth &<< 3 }
@@ -264,7 +267,7 @@ extension _StringGuts {
264267
@_versioned
265268
internal
266269
var _isEmptyLiteral: Bool {
267-
return _isUnmanaged && _unmanagedRawStart == _emptyStringBase
270+
return _isUnmanaged && _unmanagedCount == 0
268271
}
269272
}
270273

@@ -976,6 +979,7 @@ extension _StringGuts {
976979
@_inlineable
977980
public // @testable
978981
func _extractSlice(_ range: Range<Int>) -> _StringGuts {
982+
if range.isEmpty { return _StringGuts() }
979983
if range == 0..<count { return self }
980984
switch (isASCII, _isUnmanaged) {
981985
case (true, true):
@@ -1265,23 +1269,26 @@ extension _StringGuts {
12651269
}
12661270

12671271
extension _StringGuts {
1268-
public mutating func replaceSubrange<C>(
1272+
@_versioned
1273+
mutating func _replaceSubrange<C, CodeUnit>(
12691274
_ bounds: Range<Int>,
1270-
with newElements: C
1271-
) where C : Collection, C.Element == UTF16.CodeUnit {
1275+
with newElements: C,
1276+
of codeUnit: CodeUnit.Type
1277+
) where C : Collection, C.Element == UTF16.CodeUnit,
1278+
CodeUnit : FixedWidthInteger & UnsignedInteger {
12721279
_precondition(bounds.lowerBound >= 0,
12731280
"replaceSubrange: subrange start precedes String start")
12741281

12751282
let newCount: Int = numericCast(newElements.count)
12761283
let deltaCount = newCount - bounds.count
12771284
let paramsOpt = allocationParametersForMutableStorage(
1278-
of: UTF16.CodeUnit.self,
1285+
of: CodeUnit.self,
12791286
unusedCapacity: Swift.max(0, deltaCount))
12801287

12811288
if _fastPath(paramsOpt == nil) {
12821289
// We have unique native storage of the correct code unit,
12831290
// with enough capacity to do the replacement inline.
1284-
unowned(unsafe) let storage = _nativeStorage(of: UTF16.CodeUnit.self)
1291+
unowned(unsafe) let storage = _nativeStorage(of: CodeUnit.self)
12851292
_sanityCheck(storage.unusedCapacity >= deltaCount)
12861293
let tailCount = storage.count - bounds.upperBound
12871294
_precondition(tailCount >= 0,
@@ -1294,10 +1301,11 @@ extension _StringGuts {
12941301
count: tailCount)
12951302
}
12961303
// Copy new elements in place
1297-
let (_, end) = UnsafeMutableBufferPointer(
1298-
start: dst,
1299-
count: newCount).initialize(from: newElements)
1300-
_precondition(end == newCount, "Collection misreported its count")
1304+
var it = newElements.makeIterator()
1305+
for p in dst ..< (dst + newCount) {
1306+
p.pointee = CodeUnit(it.next()!)
1307+
}
1308+
_precondition(it.next() == nil, "Collection misreported its count")
13011309
storage.count += deltaCount
13021310
_nativeCount += deltaCount
13031311
_invariantCheck()
@@ -1309,7 +1317,7 @@ extension _StringGuts {
13091317
let params = paramsOpt._unsafelyUnwrappedUnchecked
13101318
_precondition(bounds.upperBound <= params.count,
13111319
"replaceSubrange: subrange extends past String end")
1312-
let storage = _SwiftStringStorage<UTF16.CodeUnit>.create(
1320+
let storage = _SwiftStringStorage<CodeUnit>.create(
13131321
capacity: params.capacity,
13141322
count: params.count + deltaCount)
13151323
var dst = storage.start
@@ -1321,9 +1329,11 @@ extension _StringGuts {
13211329
dst += prefixRange.count
13221330

13231331
// Copy new data
1324-
let (_, end) = UnsafeMutableBufferPointer(
1325-
start: dst, count: newCount).initialize(from: newElements)
1326-
_precondition(end == newCount, "Collection misreported its count")
1332+
var it = newElements.makeIterator()
1333+
for p in dst ..< (dst + newCount) {
1334+
p.pointee = CodeUnit(it.next()!)
1335+
}
1336+
_precondition(it.next() == nil, "Collection misreported its count")
13271337
dst += newCount
13281338

13291339
// Copy suffix from end of replaced range
@@ -1335,12 +1345,58 @@ extension _StringGuts {
13351345
self = _StringGuts(storage)
13361346
_invariantCheck()
13371347
}
1348+
1349+
public mutating func replaceSubrange<C>(
1350+
_ bounds: Range<Int>,
1351+
with newElements: C
1352+
) where C : Collection, C.Element == UTF16.CodeUnit {
1353+
if isASCII && !newElements.contains(where: {$0 > 0x7f}) {
1354+
self._replaceSubrange(bounds, with: newElements, of: UInt8.self)
1355+
} else {
1356+
self._replaceSubrange(bounds, with: newElements, of: UTF16.CodeUnit.self)
1357+
}
1358+
}
13381359
}
13391360

13401361
//
13411362
// String API
13421363
//
13431364

1365+
// UnicodeScalarView operations
1366+
extension _StringGuts {
1367+
@_inlineable // FIXME(sil-serialize-all)
1368+
@_versioned // FIXME(sil-serialize-all)
1369+
func _unicodeScalarWidth(startingAt offset: Int) -> Int {
1370+
if _slowPath(_isOpaque) {
1371+
return _asOpaque()._unicodeScalarWidth(startingAt: offset)
1372+
}
1373+
if isASCII { return 1 }
1374+
return _unmanagedUTF16View._unicodeScalarWidth(startingAt: offset)
1375+
}
1376+
1377+
@_inlineable // FIXME(sil-serialize-all)
1378+
@_versioned // FIXME(sil-serialize-all)
1379+
func _unicodeScalarWidth(endingAt offset: Int) -> Int {
1380+
if _slowPath(_isOpaque) {
1381+
return _asOpaque()._unicodeScalarWidth(endingAt: offset)
1382+
}
1383+
if isASCII { return 1 }
1384+
return _unmanagedUTF16View._unicodeScalarWidth(endingAt: offset)
1385+
}
1386+
1387+
@_inlineable // FIXME(sil-serialize-all)
1388+
@_versioned // FIXME(sil-serialize-all)
1389+
func _decodeUnicodeScalar(startingAt offset: Int) -> UnicodeDecodingResult {
1390+
if _slowPath(_isOpaque) {
1391+
return _asOpaque()._decodeUnicodeScalar(startingAt: offset)
1392+
}
1393+
if isASCII {
1394+
return _unmanagedASCIIView._decodeUnicodeScalar(startingAt: offset)
1395+
}
1396+
return _unmanagedUTF16View._decodeUnicodeScalar(startingAt: offset)
1397+
}
1398+
}
1399+
13441400
// Some CharacterView operations
13451401
extension String {
13461402
/// Accesses the character at the given position.

stdlib/public/core/StringUTF16.swift

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -214,29 +214,28 @@ extension String {
214214

215215
let index = _internalIndex(at: i.encodedOffset)
216216
let u = _guts[index]
217-
if _fastPath((u &>> 11) != 0b1101_1) {
217+
if _fastPath(UTF16._isScalar(u)) {
218218
// Neither high-surrogate, nor low-surrogate -- well-formed sequence
219219
// of 1 code unit.
220220
return u
221221
}
222222

223-
if (u &>> 10) == 0b1101_10 {
224-
// `u` is a high-surrogate. Sequence is well-formed if it
225-
// is followed by a low-surrogate.
223+
if UTF16.isLeadSurrogate(u) {
224+
// Sequence is well-formed if `u` is followed by a low-surrogate.
226225
if _fastPath(
227-
index + 1 < _guts.count &&
228-
(_guts[index + 1] &>> 10) == 0b1101_11) {
226+
index + 1 < _guts.count &&
227+
UTF16.isTrailSurrogate(_guts[index + 1])) {
229228
return u
230229
}
231-
return 0xfffd
230+
return UTF16._replacementCodeUnit
232231
}
233232

234233
// `u` is a low-surrogate. Sequence is well-formed if
235234
// previous code unit is a high-surrogate.
236-
if _fastPath(index != 0 && (_guts[index - 1] &>> 10) == 0b1101_10) {
235+
if _fastPath(index != 0 && UTF16.isLeadSurrogate(_guts[index - 1])) {
237236
return u
238237
}
239-
return 0xfffd
238+
return UTF16._replacementCodeUnit
240239
}
241240

242241
#if _runtime(_ObjC)

0 commit comments

Comments
 (0)