Skip to content

Commit e04e7ce

Browse files
authored
Merge pull request #9412 from apple/direct-transcoding
2 parents 3970397 + a100e8e commit e04e7ce

12 files changed

+422
-46
lines changed

stdlib/public/core/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ set(SWIFTLIB_ESSENTIAL
105105
Reverse.swift
106106
Runtime.swift.gyb
107107
SipHash.swift.gyb
108+
SentinelCollection.swift
108109
Sequence.swift
109110
SequenceAlgorithms.swift.gyb
110111
SequenceWrapper.swift

stdlib/public/core/GroupInfo.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
"Repeat.swift",
4949
"Sort.swift",
5050
"Range.swift",
51+
"SentinelCollection.swift",
5152
"ClosedRange.swift",
5253
"CollectionOfOne.swift",
5354
"HeapBuffer.swift",
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
//===--- SentinelCollection.swift -----------------------------------------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
public // @testable
13+
protocol _Function {
14+
associatedtype Input
15+
associatedtype Output
16+
func apply(_: Input) -> Output
17+
}
18+
19+
protocol _Predicate : _Function where Output == Bool { }
20+
21+
struct _SentinelIterator<
22+
Base: IteratorProtocol,
23+
IsSentinel : _Predicate
24+
> : IteratorProtocol, Sequence
25+
where IsSentinel.Input == Base.Element {
26+
var _base: Base
27+
var _isSentinel: IsSentinel
28+
var _expired: Bool = false
29+
30+
init(_ base: Base, until condition: IsSentinel) {
31+
_base = base
32+
_isSentinel = condition
33+
}
34+
35+
mutating func next() -> Base.Element? {
36+
guard _fastPath(!_expired) else { return nil }
37+
let x = _base.next()
38+
// We don't need this check if it's a precondition that the sentinel will be
39+
// found
40+
// guard _fastPath(x != nil), let y = x else { return x }
41+
guard _fastPath(!_isSentinel.apply(x!)) else { _expired = true; return nil }
42+
return x
43+
}
44+
}
45+
46+
struct _SentinelCollection<
47+
Base: Collection,
48+
IsSentinel : _Predicate
49+
> : Collection
50+
where IsSentinel.Input == Base.Iterator.Element {
51+
let _isSentinel: IsSentinel
52+
var _base : Base
53+
54+
typealias IndexDistance = Base.IndexDistance
55+
56+
func makeIterator() -> _SentinelIterator<Base.Iterator, IsSentinel> {
57+
return _SentinelIterator(_base.makeIterator(), until: _isSentinel)
58+
}
59+
60+
struct Index : Comparable {
61+
var _impl: (position: Base.Index, element: Base.Iterator.Element)?
62+
63+
static func == (lhs: Index, rhs: Index) -> Bool {
64+
if rhs._impl == nil { return lhs._impl == nil }
65+
return lhs._impl != nil && rhs._impl!.position == lhs._impl!.position
66+
}
67+
68+
static func < (lhs: Index, rhs: Index) -> Bool {
69+
if rhs._impl == nil { return lhs._impl != nil }
70+
return lhs._impl != nil && rhs._impl!.position < lhs._impl!.position
71+
}
72+
}
73+
74+
var startIndex : Index {
75+
return _index(at: _base.startIndex)
76+
}
77+
78+
var endIndex : Index {
79+
return Index(_impl: nil)
80+
}
81+
82+
subscript(i: Index) -> Base.Iterator.Element {
83+
return i._impl!.element
84+
}
85+
86+
func index(after i: Index) -> Index {
87+
return _index(at: _base.index(after: i._impl!.position))
88+
}
89+
90+
func _index(at i: Base.Index) -> Index {
91+
// We don't need this check if it's a precondition that the sentinel will be
92+
// found
93+
// guard _fastPath(i != _base.endIndex) else { return endIndex }
94+
let e = _base[i]
95+
guard _fastPath(!_isSentinel.apply(e)) else { return endIndex }
96+
return Index(_impl: (position: i, element: e))
97+
}
98+
99+
init(_ base: Base, until condition: IsSentinel) {
100+
_base = base
101+
_isSentinel = condition
102+
}
103+
}
104+
105+
struct _IsZero<T : BinaryInteger> : _Predicate {
106+
func apply(_ x: T) -> Bool {
107+
return x == 0
108+
}
109+
}

stdlib/public/core/String.swift

Lines changed: 174 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,45 @@ public protocol StringProtocol
4242

4343
func lowercased() -> String
4444
func uppercased() -> String
45+
46+
/// Constructs a `String` having the same contents as `codeUnits`.
47+
///
48+
/// - Parameter codeUnits: a collection of code units in
49+
/// the given `encoding`.
50+
/// - Parameter encoding: describes the encoding in which the code units
51+
/// should be interpreted.
52+
init<C: Collection, Encoding: UnicodeEncoding>(
53+
codeUnits: C, encoding: Encoding.Type
54+
)
55+
where C.Iterator.Element == Encoding.CodeUnit
56+
57+
/// Constructs a `String` having the same contents as `nulTerminatedUTF8`.
58+
///
59+
/// - Parameter nulTerminatedUTF8: a sequence of contiguous UTF-8 encoded
60+
/// bytes ending just before the first zero byte (NUL character).
61+
init(cString nulTerminatedUTF8: UnsafePointer<CChar>)
62+
63+
/// Constructs a `String` having the same contents as `nulTerminatedCodeUnits`.
64+
///
65+
/// - Parameter nulTerminatedCodeUnits: a sequence of contiguous code units in
66+
/// the given `encoding`, ending just before the first zero code unit.
67+
/// - Parameter encoding: describes the encoding in which the code units
68+
/// should be interpreted.
69+
init<Encoding: UnicodeEncoding>(
70+
cString nulTerminatedCodeUnits: UnsafePointer<Encoding.CodeUnit>,
71+
encoding: Encoding.Type)
72+
73+
/// Invokes the given closure on the contents of the string, represented as a
74+
/// pointer to a null-terminated sequence of UTF-8 code units.
75+
func withCString<Result>(
76+
_ body: (UnsafePointer<CChar>) throws -> Result) rethrows -> Result
77+
78+
/// Invokes the given closure on the contents of the string, represented as a
79+
/// pointer to a null-terminated sequence of code units in the given encoding.
80+
func withCString<Result, Encoding: UnicodeEncoding>(
81+
encoding: Encoding.Type,
82+
_ body: (UnsafePointer<Encoding.CodeUnit>) throws -> Result
83+
) rethrows -> Result
4584
}
4685

4786
extension StringProtocol {
@@ -52,7 +91,141 @@ extension StringProtocol {
5291
}
5392
}
5493

55-
// FIXME: complexity documentation for most of methods on String is ought to be
94+
/// Call body with a pointer to zero-terminated sequence of
95+
/// `TargetEncoding.CodeUnit` representing the same string as `source`, when
96+
/// `source` is interpreted as being encoded with `SourceEncoding`.
97+
internal func _withCString<
98+
Source : Collection,
99+
SourceEncoding : UnicodeEncoding,
100+
TargetEncoding : UnicodeEncoding,
101+
Result
102+
>(
103+
encodedAs targetEncoding: TargetEncoding.Type,
104+
from source: Source,
105+
encodedAs sourceEncoding: SourceEncoding.Type,
106+
execute body : (UnsafePointer<TargetEncoding.CodeUnit>) throws -> Result
107+
) rethrows -> Result
108+
where Source.Iterator.Element == SourceEncoding.CodeUnit {
109+
return try _withCStringAndLength(
110+
encodedAs: targetEncoding,
111+
from: source,
112+
encodedAs: sourceEncoding) { p, _ in try body(p) }
113+
}
114+
115+
internal func _withCStringAndLength<
116+
Source : Collection,
117+
SourceEncoding : UnicodeEncoding,
118+
TargetEncoding : UnicodeEncoding,
119+
Result
120+
>(
121+
encodedAs targetEncoding: TargetEncoding.Type,
122+
from source: Source,
123+
encodedAs sourceEncoding: SourceEncoding.Type,
124+
execute body : (UnsafePointer<TargetEncoding.CodeUnit>, Int) throws -> Result
125+
) rethrows -> Result
126+
where Source.Iterator.Element == SourceEncoding.CodeUnit {
127+
var targetLength = 0 // nul terminator
128+
var i = source.makeIterator()
129+
SourceEncoding.ForwardParser.parse(&i) {
130+
targetLength += numericCast(
131+
targetEncoding.transcode($0, from: SourceEncoding.self).count)
132+
}
133+
var a: [TargetEncoding.CodeUnit] = []
134+
a.reserveCapacity(targetLength + 1)
135+
i = source.makeIterator()
136+
SourceEncoding.ForwardParser.parse(&i) {
137+
a.append(
138+
contentsOf: targetEncoding.transcode($0, from: SourceEncoding.self))
139+
}
140+
a.append(0)
141+
return try body(a, targetLength)
142+
}
143+
144+
extension _StringCore {
145+
/// Invokes `body` on a null-terminated sequence of code units in the given
146+
/// encoding corresponding to the substring in `bounds`.
147+
internal func _withCSubstring<Result, TargetEncoding: UnicodeEncoding>(
148+
in bounds: Range<Index>,
149+
encoding targetEncoding: TargetEncoding.Type,
150+
_ body: (UnsafePointer<TargetEncoding.CodeUnit>) throws -> Result
151+
) rethrows -> Result {
152+
return try _withCSubstringAndLength(in: bounds, encoding: targetEncoding) {
153+
p,_ in try body(p)
154+
}
155+
}
156+
157+
internal func _withCSubstringAndLength<
158+
Result, TargetEncoding: UnicodeEncoding
159+
>(
160+
in bounds: Range<Index>,
161+
encoding targetEncoding: TargetEncoding.Type,
162+
_ body: (UnsafePointer<TargetEncoding.CodeUnit>, Int) throws -> Result
163+
) rethrows -> Result {
164+
if _fastPath(hasContiguousStorage) {
165+
defer { _fixLifetime(self) }
166+
if isASCII {
167+
return try Swift._withCStringAndLength(
168+
encodedAs: targetEncoding,
169+
from: UnsafeBufferPointer(start: startASCII, count: count)[bounds],
170+
encodedAs: _Unicode.ASCII.self,
171+
execute: body
172+
)
173+
}
174+
else {
175+
return try Swift._withCStringAndLength(
176+
encodedAs: targetEncoding,
177+
from: UnsafeBufferPointer(start: startUTF16, count: count)[bounds],
178+
encodedAs: _Unicode.UTF16.self,
179+
execute: body
180+
)
181+
}
182+
}
183+
return try Swift._withCStringAndLength(
184+
encodedAs: targetEncoding,
185+
from: self[bounds],
186+
encodedAs: _Unicode.UTF16.self,
187+
execute: body
188+
)
189+
}
190+
}
191+
192+
extension String {
193+
public init<C: Collection, Encoding: UnicodeEncoding>(
194+
codeUnits: C, encoding: Encoding.Type
195+
) where C.Iterator.Element == Encoding.CodeUnit {
196+
let (b,_) = _StringBuffer.fromCodeUnits(
197+
codeUnits, encoding: encoding, repairIllFormedSequences: true)
198+
self = String(_StringCore(b!))
199+
}
200+
201+
/// Constructs a `String` having the same contents as `nulTerminatedCodeUnits`.
202+
///
203+
/// - Parameter nulTerminatedCodeUnits: a sequence of contiguous code units in
204+
/// the given `encoding`, ending just before the first zero code unit.
205+
/// - Parameter encoding: describes the encoding in which the code units
206+
/// should be interpreted.
207+
public init<Encoding: UnicodeEncoding>(
208+
cString nulTerminatedCodeUnits: UnsafePointer<Encoding.CodeUnit>,
209+
encoding: Encoding.Type) {
210+
211+
let codeUnits = _SentinelCollection(
212+
UnsafeBufferPointer(_unboundedStartingAt: nulTerminatedCodeUnits),
213+
until: _IsZero()
214+
)
215+
self.init(codeUnits: codeUnits, encoding: encoding)
216+
}
217+
218+
/// Invokes the given closure on the contents of the string, represented as a
219+
/// pointer to a null-terminated sequence of code units in the given encoding.
220+
public func withCString<Result, TargetEncoding: UnicodeEncoding>(
221+
encoding targetEncoding: TargetEncoding.Type,
222+
_ body: (UnsafePointer<TargetEncoding.CodeUnit>) throws -> Result
223+
) rethrows -> Result {
224+
return try _core._withCSubstring(
225+
in: _core.startIndex..<_core.endIndex, encoding: targetEncoding, body)
226+
}
227+
}
228+
// FIXME: complexity documentation for most of methods on String ought to be
56229
// qualified with "amortized" at least, as Characters are variable-length.
57230

58231
/// A Unicode string value.

stdlib/public/core/StringBuffer.swift

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -91,14 +91,11 @@ public struct _StringBuffer {
9191
= ((_storage._capacity() - capacityBump) &<< 1) + elementShift
9292
}
9393

94-
static func fromCodeUnits<Input, Encoding>(
94+
static func fromCodeUnits<Input : Sequence, Encoding : UnicodeEncoding>(
9595
_ input: Input, encoding: Encoding.Type, repairIllFormedSequences: Bool,
9696
minimumCapacity: Int = 0
9797
) -> (_StringBuffer?, hadError: Bool)
98-
where
99-
Input : Collection, // Sequence?
100-
Encoding : UnicodeEncoding,
101-
Input.Iterator.Element == Encoding.CodeUnit {
98+
where Input.Iterator.Element == Encoding.CodeUnit {
10299
// Determine how many UTF-16 code units we'll need
103100
let inputStream = input.makeIterator()
104101
guard let (utf16Count, isAscii) = UTF16.transcodedLength(

stdlib/public/core/StringCore.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -387,7 +387,7 @@ public struct _StringCore {
387387
}
388388
}
389389
else if let content = _unmanagedUTF16 {
390-
var i = content.makeIterator()
390+
var i = content.makeIterator()
391391
_Unicode.UTF16.ForwardParser.parse(&i) {
392392
Encoding.transcode($0, from: UTF16.self).forEach(processCodeUnit)
393393
}

0 commit comments

Comments
 (0)