Skip to content

Eager string bridging #5489

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion stdlib/public/core/Character.swift
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ public struct Character :
///
/// let oBreve: Character = "o\u{306}"
/// print(oBreve)
/// // Prints ""
/// // Prints "ŏ"
///
/// The assignment to the `oBreve` constant calls this initializer behind the
/// scenes.
Expand Down
2 changes: 1 addition & 1 deletion stdlib/public/core/ObjCMirrors.swift
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ func _getObjCChild<T>(_: Int, _: _MagicMirrorData) -> (T, _Mirror)

func _getObjCSummary(_ data: _MagicMirrorData) -> String {
let theDescription = _swift_stdlib_objcDebugDescription(data._loadValue(ofType: AnyObject.self)) as AnyObject
return _cocoaStringToSwiftString_NonASCII(theDescription)
return String(_cocoaString: theDescription)
}

public // SPI(runtime)
Expand Down
26 changes: 26 additions & 0 deletions stdlib/public/core/ShadowProtocols.swift
Original file line number Diff line number Diff line change
Expand Up @@ -182,10 +182,36 @@ public protocol _NSNumber {
var objCType: UnsafePointer<Int8> { get }
}

/// A shadow for the API of NSString we will use in the core stdlib.
@objc
public protocol _NSStringCore {
func length() -> Int
func characterAtIndex(_ index: Int) -> UInt16

func getCharacters(
_ buffer: UnsafeMutablePointer<UInt16>,
range aRange: _SwiftNSRange)

// SPI APIs
func _fastCharacterContents() -> UnsafeMutablePointer<UInt16>?
}

// See _NSSet above for why this exists
@unsafe_no_objc_tagged_pointer @objc
public protocol _NSString : _NSStringCore {
func getCharacters(_ buffer: UnsafeMutablePointer<UInt16>)

// SPI APIs
// TODO: undo Uint8 being used as a proxy for bool
func _fastCStringContents(_ nullTerminationRequired: UInt8)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just so you know, this is not 100% correct. ObjCBool is only UInt8 on some platforms, and it's theoretically possible that UInt8 and Bool would have different calling conventions.

Internal Clang bug rdar://problem/21170440 may provide a solution we can use in SwiftShims.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Summarizing: this is a bad hack that probably works fine in practice for the time being. When https://reviews.llvm.org/D26234 is merged, we can use that to do the strictly correct thing and provide a #define in SwiftShims.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Further, _fastCStringContents may need to account for the system encoding. See also #3151

-> UnsafeMutablePointer<Int8>?
}

#else

public protocol _NSArrayCore {}
public protocol _NSDictionaryCore {}
public protocol _NSSetCore {}
public protocol _NSStringCore {}

#endif
10 changes: 4 additions & 6 deletions stdlib/public/core/String.swift
Original file line number Diff line number Diff line change
Expand Up @@ -403,7 +403,6 @@ extension String : _ExpressibleByBuiltinUTF16StringLiteral {
baseAddress: UnsafeMutableRawPointer(start),
count: Int(utf16CodeUnitCount),
elementShift: 1,
hasCocoaBuffer: false,
owner: nil))
}
}
Expand All @@ -421,7 +420,6 @@ extension String : _ExpressibleByBuiltinStringLiteral {
baseAddress: UnsafeMutableRawPointer(start),
count: Int(utf8CodeUnitCount),
elementShift: 0,
hasCocoaBuffer: false,
owner: nil))
}
else {
Expand Down Expand Up @@ -628,10 +626,10 @@ extension Sequence where Iterator.Element == String {

#if _runtime(_ObjC)
@_silgen_name("swift_stdlib_NSStringLowercaseString")
func _stdlib_NSStringLowercaseString(_ str: AnyObject) -> _CocoaString
func _stdlib_NSStringLowercaseString(_ str: AnyObject) -> _NSString

@_silgen_name("swift_stdlib_NSStringUppercaseString")
func _stdlib_NSStringUppercaseString(_ str: AnyObject) -> _CocoaString
func _stdlib_NSStringUppercaseString(_ str: AnyObject) -> _NSString
#else
internal func _nativeUnicodeLowercaseString(_ str: String) -> String {
var buffer = _StringBuffer(
Expand Down Expand Up @@ -757,7 +755,7 @@ extension String {
}

#if _runtime(_ObjC)
return _cocoaStringToSwiftString_NonASCII(
return String(_cocoaString:
_stdlib_NSStringLowercaseString(self._bridgeToObjectiveCImpl()))
#else
return _nativeUnicodeLowercaseString(self)
Expand Down Expand Up @@ -796,7 +794,7 @@ extension String {
}

#if _runtime(_ObjC)
return _cocoaStringToSwiftString_NonASCII(
return String(_cocoaString:
_stdlib_NSStringUppercaseString(self._bridgeToObjectiveCImpl()))
#else
return _nativeUnicodeUppercaseString(self)
Expand Down
198 changes: 30 additions & 168 deletions stdlib/public/core/StringBridge.swift
Original file line number Diff line number Diff line change
Expand Up @@ -20,164 +20,44 @@ import SwiftShims
/// Effectively an untyped NSString that doesn't require foundation.
public typealias _CocoaString = AnyObject

public // @testable
func _stdlib_binary_CFStringCreateCopy(
_ source: _CocoaString
) -> _CocoaString {
let result = _swift_stdlib_CFStringCreateCopy(nil, source) as AnyObject
Builtin.release(result)
return result
}

public // @testable
func _stdlib_binary_CFStringGetLength(
_ source: _CocoaString
) -> Int {
return _swift_stdlib_CFStringGetLength(source)
}

public // @testable
func _stdlib_binary_CFStringGetCharactersPtr(
_ source: _CocoaString
) -> UnsafeMutablePointer<UTF16.CodeUnit>? {
return UnsafeMutablePointer(mutating: _swift_stdlib_CFStringGetCharactersPtr(source))
}

/// Bridges `source` to `Swift.String`, assuming that `source` has non-ASCII
/// characters (does not apply ASCII optimizations).
@inline(never) @_semantics("stdlib_binary_only") // Hide the CF dependency
func _cocoaStringToSwiftString_NonASCII(
_ source: _CocoaString
) -> String {
let cfImmutableValue = _stdlib_binary_CFStringCreateCopy(source)
let length = _stdlib_binary_CFStringGetLength(cfImmutableValue)
let start = _stdlib_binary_CFStringGetCharactersPtr(cfImmutableValue)

return String(_StringCore(
baseAddress: start,
count: length,
elementShift: 1,
hasCocoaBuffer: true,
owner: unsafeBitCast(cfImmutableValue, to: Optional<AnyObject>.self)))
}

/// Loading Foundation initializes these function variables
/// with useful values

/// Produces a `_StringBuffer` from a given subrange of a source
/// `_CocoaString`, having the given minimum capacity.
@inline(never) @_semantics("stdlib_binary_only") // Hide the CF dependency
internal func _cocoaStringToContiguous(
source: _CocoaString, range: Range<Int>, minimumCapacity: Int
) -> _StringBuffer {
_sanityCheck(_swift_stdlib_CFStringGetCharactersPtr(source) == nil,
"Known contiguously stored strings should already be converted to Swift")

let startIndex = range.lowerBound
let count = range.upperBound - startIndex

let buffer = _StringBuffer(capacity: max(count, minimumCapacity),
initialSize: count, elementWidth: 2)

_swift_stdlib_CFStringGetCharacters(
source, _swift_shims_CFRange(location: startIndex, length: count),
buffer.start.assumingMemoryBound(to: _swift_shims_UniChar.self))

return buffer
}

/// Reads the entire contents of a _CocoaString into contiguous
/// storage of sufficient capacity.
@inline(never) @_semantics("stdlib_binary_only") // Hide the CF dependency
internal func _cocoaStringReadAll(
_ source: _CocoaString, _ destination: UnsafeMutablePointer<UTF16.CodeUnit>
) {
_swift_stdlib_CFStringGetCharacters(
source, _swift_shims_CFRange(
location: 0, length: _swift_stdlib_CFStringGetLength(source)), destination)
}

@inline(never) @_semantics("stdlib_binary_only") // Hide the CF dependency
internal func _cocoaStringSlice(
_ target: _StringCore, _ bounds: Range<Int>
) -> _StringCore {
_sanityCheck(target.hasCocoaBuffer)

let cfSelf: _swift_shims_CFStringRef = target.cocoaBuffer.unsafelyUnwrapped

_sanityCheck(
_swift_stdlib_CFStringGetCharactersPtr(cfSelf) == nil,
"Known contiguously stored strings should already be converted to Swift")

let cfResult = _swift_stdlib_CFStringCreateWithSubstring(
nil, cfSelf, _swift_shims_CFRange(
location: bounds.lowerBound, length: bounds.count)) as AnyObject

return String(_cocoaString: cfResult)._core
}

@_versioned
@inline(never) @_semantics("stdlib_binary_only") // Hide the CF dependency
internal func _cocoaStringSubscript(
_ target: _StringCore, _ position: Int
) -> UTF16.CodeUnit {
let cfSelf: _swift_shims_CFStringRef = target.cocoaBuffer.unsafelyUnwrapped

_sanityCheck(_swift_stdlib_CFStringGetCharactersPtr(cfSelf) == nil,
"Known contiguously stored strings should already be converted to Swift")

return _swift_stdlib_CFStringGetCharacterAtIndex(cfSelf, position)
}

//
// Conversion from NSString to Swift's native representation
//

internal var kCFStringEncodingASCII : _swift_shims_CFStringEncoding {
return 0x0600
}

extension String {
@inline(never) @_semantics("stdlib_binary_only") // Hide the CF dependency
public // SPI(Foundation)
init(_cocoaString: AnyObject) {
// If the NSString is actually a Swift String in disguise,
// we can just copy out the internal repr.
if let wrapped = _cocoaString as? _NSContiguousString {
self._core = wrapped._core
return
}

// "copy" it into a value to be sure nobody will modify behind
// our backs. In practice, when value is already immutable, this
// just does a retain.
let cfImmutableValue
= _stdlib_binary_CFStringCreateCopy(_cocoaString) as AnyObject
let cocoaString = unsafeBitCast(_cocoaString, to: _NSString.self)

let length = _swift_stdlib_CFStringGetLength(cfImmutableValue)
let length = cocoaString.length()

// Look first for null-terminated ASCII
// Note: the code in clownfish appears to guarantee
// nul-termination, but I'm waiting for an answer from Chris Kane
// about whether we can count on it for all time or not.
let nulTerminatedASCII = _swift_stdlib_CFStringGetCStringPtr(
cfImmutableValue, kCFStringEncodingASCII)
// The 0 here is `nullRequired: false` (bad hack)
let nulTerminatedASCII = cocoaString._fastCStringContents(0)

// start will hold the base pointer of contiguous storage, if it
// is found.
var start: UnsafeMutableRawPointer?
let isUTF16 = (nulTerminatedASCII == nil)
let buffer = _StringBuffer(capacity: length,
initialSize: length,
elementWidth: isUTF16 ? 2 : 1)

// We try our darndest to just get a pointer and memcpy, falling back
// to having the string do it for us with getCharacters.
if isUTF16 {
let utf16Buf = _swift_stdlib_CFStringGetCharactersPtr(cfImmutableValue)
start = UnsafeMutableRawPointer(mutating: utf16Buf)
let bufPtr = buffer.start.assumingMemoryBound(to: UInt16.self)
if let utf16Ptr = cocoaString._fastCharacterContents() {
bufPtr.assign(from: utf16Ptr, count: length)
} else {
cocoaString.getCharacters(bufPtr)
}

} else {
start = UnsafeMutableRawPointer(mutating: nulTerminatedASCII)
buffer.start.copyBytes(from: nulTerminatedASCII!, count: length)
}

self._core = _StringCore(
baseAddress: start,
count: length,
elementShift: isUTF16 ? 1 : 0,
hasCocoaBuffer: true,
owner: cfImmutableValue)
self._core = _StringCore(buffer)
}
}

Expand All @@ -187,29 +67,16 @@ extension String {
// The @_swift_native_objc_runtime_base attribute
// This allows us to subclass an Objective-C class and use the fast Swift
// memory allocator.
//
// Subclasses should provide:
// * func length() -> Int
// * func characterAtIndex(_ index: Int) -> UInt16
@objc @_swift_native_objc_runtime_base(_SwiftNativeNSStringBase)
public class _SwiftNativeNSString {}

@objc
public protocol _NSStringCore :
_NSCopying, _NSFastEnumeration {

// The following methods should be overridden when implementing an
// NSString subclass.

func length() -> Int

func characterAtIndex(_ index: Int) -> UInt16

// We also override the following methods for efficiency.
}

/// An `NSString` built around a slice of contiguous Swift `String` storage.
public final class _NSContiguousString : _SwiftNativeNSString {
public final class _NSContiguousString : _SwiftNativeNSString, _NSStringCore {
public init(_ _core: _StringCore) {
_sanityCheck(
_core.hasContiguousStorage,
"_NSContiguousString requires contiguous storage")
self._core = _core
super.init()
}
Expand All @@ -218,16 +85,16 @@ public final class _NSContiguousString : _SwiftNativeNSString {
_sanityCheckFailure("init(coder:) not implemented for _NSContiguousString")
}

func length() -> Int {
public func length() -> Int {
return _core.count
}

func characterAtIndex(_ index: Int) -> UInt16 {
public func characterAtIndex(_ index: Int) -> UInt16 {
return _core[index]
}

@inline(__always) // Performance: To save on reference count operations.
func getCharacters(
public func getCharacters(
_ buffer: UnsafeMutablePointer<UInt16>,
range aRange: _SwiftNSRange) {

Expand All @@ -248,7 +115,7 @@ public final class _NSContiguousString : _SwiftNativeNSString {
}

@objc
func _fastCharacterContents() -> UnsafeMutablePointer<UInt16>? {
public func _fastCharacterContents() -> UnsafeMutablePointer<UInt16>? {
return _core.elementWidth == 2 ? _core.startUTF16 : nil
}

Expand Down Expand Up @@ -313,11 +180,6 @@ extension String {
/// Same as `_bridgeToObjectiveC()`, but located inside the core standard
/// library.
public func _stdlib_binary_bridgeToObjectiveCImpl() -> AnyObject {
if let ns = _core.cocoaBuffer,
_swift_stdlib_CFStringGetLength(ns) == _core.count {
return ns
}
_sanityCheck(_core.hasContiguousStorage)
return _NSContiguousString(_core)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it would be nice if we didn't have such a performance hit here; this will destroy all tagged pointer strings which is a decent memory savings to objc, and it destroys the fast-path accessors we have in Foundation as well.

it really needs to implement the _fastCStringContents: as well as fastestEncoding and smallestEncoding. Please reference rdar://problem/18525604

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So the fact that we need to create an NsContiguousString is an old bug that needs to be fixed for sure, but if we fix that then Swift->Objc is free. The only impact is at the Objc -> Swift part.

It's an often stated claim that we want String to become a single pointer, with tagged pointer optimizations. It's possible we can pick up objc's tagged pointer opts if we go that way? But we need someone to take point on the design here. I know @dabrahams is working on a lot of string stuff, but idk if he's been looking at this aspect of it yet.

I'll look into filling in the fastpaths, thanks!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I am looking into that aspect and intend to handle it.

}

Expand Down
17 changes: 7 additions & 10 deletions stdlib/public/core/StringComparable.swift
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
Expand Down Expand Up @@ -73,17 +74,13 @@ extension String {
// Note: this operation should be consistent with equality comparison of
// Character.
#if _runtime(_ObjC)
if self._core.hasContiguousStorage && rhs._core.hasContiguousStorage {
let lhsStr = _NSContiguousString(self._core)
let rhsStr = _NSContiguousString(rhs._core)
let res = lhsStr._unsafeWithNotEscapedSelfPointerPair(rhsStr) {
return Int(
_stdlib_compareNSStringDeterministicUnicodeCollationPointer($0, $1))
}
return res
let lhsStr = _NSContiguousString(self._core)
let rhsStr = _NSContiguousString(rhs._core)
let res = lhsStr._unsafeWithNotEscapedSelfPointerPair(rhsStr) {
return Int(
_stdlib_compareNSStringDeterministicUnicodeCollationPointer($0, $1))
}
return Int(_stdlib_compareNSStringDeterministicUnicodeCollation(
_bridgeToObjectiveCImpl(), rhs._bridgeToObjectiveCImpl()))
return res
#else
switch (_core.isASCII, rhs._core.isASCII) {
case (true, false):
Expand Down
Loading