-
Notifications
You must be signed in to change notification settings - Fork 10.5k
Eager string bridging #5489
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Eager string bridging #5489
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,164 +20,44 @@ import SwiftShims | |
/// Effectively an untyped NSString that doesn't require foundation. | ||
public typealias _CocoaString = AnyObject | ||
|
||
public // @testable | ||
func _stdlib_binary_CFStringCreateCopy( | ||
_ source: _CocoaString | ||
) -> _CocoaString { | ||
let result = _swift_stdlib_CFStringCreateCopy(nil, source) as AnyObject | ||
Builtin.release(result) | ||
return result | ||
} | ||
|
||
public // @testable | ||
func _stdlib_binary_CFStringGetLength( | ||
_ source: _CocoaString | ||
) -> Int { | ||
return _swift_stdlib_CFStringGetLength(source) | ||
} | ||
|
||
public // @testable | ||
func _stdlib_binary_CFStringGetCharactersPtr( | ||
_ source: _CocoaString | ||
) -> UnsafeMutablePointer<UTF16.CodeUnit>? { | ||
return UnsafeMutablePointer(mutating: _swift_stdlib_CFStringGetCharactersPtr(source)) | ||
} | ||
|
||
/// Bridges `source` to `Swift.String`, assuming that `source` has non-ASCII | ||
/// characters (does not apply ASCII optimizations). | ||
@inline(never) @_semantics("stdlib_binary_only") // Hide the CF dependency | ||
func _cocoaStringToSwiftString_NonASCII( | ||
_ source: _CocoaString | ||
) -> String { | ||
let cfImmutableValue = _stdlib_binary_CFStringCreateCopy(source) | ||
let length = _stdlib_binary_CFStringGetLength(cfImmutableValue) | ||
let start = _stdlib_binary_CFStringGetCharactersPtr(cfImmutableValue) | ||
|
||
return String(_StringCore( | ||
baseAddress: start, | ||
count: length, | ||
elementShift: 1, | ||
hasCocoaBuffer: true, | ||
owner: unsafeBitCast(cfImmutableValue, to: Optional<AnyObject>.self))) | ||
} | ||
|
||
/// Loading Foundation initializes these function variables | ||
/// with useful values | ||
|
||
/// Produces a `_StringBuffer` from a given subrange of a source | ||
/// `_CocoaString`, having the given minimum capacity. | ||
@inline(never) @_semantics("stdlib_binary_only") // Hide the CF dependency | ||
internal func _cocoaStringToContiguous( | ||
source: _CocoaString, range: Range<Int>, minimumCapacity: Int | ||
) -> _StringBuffer { | ||
_sanityCheck(_swift_stdlib_CFStringGetCharactersPtr(source) == nil, | ||
"Known contiguously stored strings should already be converted to Swift") | ||
|
||
let startIndex = range.lowerBound | ||
let count = range.upperBound - startIndex | ||
|
||
let buffer = _StringBuffer(capacity: max(count, minimumCapacity), | ||
initialSize: count, elementWidth: 2) | ||
|
||
_swift_stdlib_CFStringGetCharacters( | ||
source, _swift_shims_CFRange(location: startIndex, length: count), | ||
buffer.start.assumingMemoryBound(to: _swift_shims_UniChar.self)) | ||
|
||
return buffer | ||
} | ||
|
||
/// Reads the entire contents of a _CocoaString into contiguous | ||
/// storage of sufficient capacity. | ||
@inline(never) @_semantics("stdlib_binary_only") // Hide the CF dependency | ||
internal func _cocoaStringReadAll( | ||
_ source: _CocoaString, _ destination: UnsafeMutablePointer<UTF16.CodeUnit> | ||
) { | ||
_swift_stdlib_CFStringGetCharacters( | ||
source, _swift_shims_CFRange( | ||
location: 0, length: _swift_stdlib_CFStringGetLength(source)), destination) | ||
} | ||
|
||
@inline(never) @_semantics("stdlib_binary_only") // Hide the CF dependency | ||
internal func _cocoaStringSlice( | ||
_ target: _StringCore, _ bounds: Range<Int> | ||
) -> _StringCore { | ||
_sanityCheck(target.hasCocoaBuffer) | ||
|
||
let cfSelf: _swift_shims_CFStringRef = target.cocoaBuffer.unsafelyUnwrapped | ||
|
||
_sanityCheck( | ||
_swift_stdlib_CFStringGetCharactersPtr(cfSelf) == nil, | ||
"Known contiguously stored strings should already be converted to Swift") | ||
|
||
let cfResult = _swift_stdlib_CFStringCreateWithSubstring( | ||
nil, cfSelf, _swift_shims_CFRange( | ||
location: bounds.lowerBound, length: bounds.count)) as AnyObject | ||
|
||
return String(_cocoaString: cfResult)._core | ||
} | ||
|
||
@_versioned | ||
@inline(never) @_semantics("stdlib_binary_only") // Hide the CF dependency | ||
internal func _cocoaStringSubscript( | ||
_ target: _StringCore, _ position: Int | ||
) -> UTF16.CodeUnit { | ||
let cfSelf: _swift_shims_CFStringRef = target.cocoaBuffer.unsafelyUnwrapped | ||
|
||
_sanityCheck(_swift_stdlib_CFStringGetCharactersPtr(cfSelf) == nil, | ||
"Known contiguously stored strings should already be converted to Swift") | ||
|
||
return _swift_stdlib_CFStringGetCharacterAtIndex(cfSelf, position) | ||
} | ||
|
||
// | ||
// Conversion from NSString to Swift's native representation | ||
// | ||
|
||
internal var kCFStringEncodingASCII : _swift_shims_CFStringEncoding { | ||
return 0x0600 | ||
} | ||
|
||
extension String { | ||
@inline(never) @_semantics("stdlib_binary_only") // Hide the CF dependency | ||
public // SPI(Foundation) | ||
init(_cocoaString: AnyObject) { | ||
// If the NSString is actually a Swift String in disguise, | ||
// we can just copy out the internal repr. | ||
if let wrapped = _cocoaString as? _NSContiguousString { | ||
self._core = wrapped._core | ||
return | ||
} | ||
|
||
// "copy" it into a value to be sure nobody will modify behind | ||
// our backs. In practice, when value is already immutable, this | ||
// just does a retain. | ||
let cfImmutableValue | ||
= _stdlib_binary_CFStringCreateCopy(_cocoaString) as AnyObject | ||
let cocoaString = unsafeBitCast(_cocoaString, to: _NSString.self) | ||
|
||
let length = _swift_stdlib_CFStringGetLength(cfImmutableValue) | ||
let length = cocoaString.length() | ||
|
||
// Look first for null-terminated ASCII | ||
// Note: the code in clownfish appears to guarantee | ||
// nul-termination, but I'm waiting for an answer from Chris Kane | ||
// about whether we can count on it for all time or not. | ||
let nulTerminatedASCII = _swift_stdlib_CFStringGetCStringPtr( | ||
cfImmutableValue, kCFStringEncodingASCII) | ||
// The 0 here is `nullRequired: false` (bad hack) | ||
let nulTerminatedASCII = cocoaString._fastCStringContents(0) | ||
|
||
// start will hold the base pointer of contiguous storage, if it | ||
// is found. | ||
var start: UnsafeMutableRawPointer? | ||
let isUTF16 = (nulTerminatedASCII == nil) | ||
let buffer = _StringBuffer(capacity: length, | ||
initialSize: length, | ||
elementWidth: isUTF16 ? 2 : 1) | ||
|
||
// We try our darndest to just get a pointer and memcpy, falling back | ||
// to having the string do it for us with getCharacters. | ||
if isUTF16 { | ||
let utf16Buf = _swift_stdlib_CFStringGetCharactersPtr(cfImmutableValue) | ||
start = UnsafeMutableRawPointer(mutating: utf16Buf) | ||
let bufPtr = buffer.start.assumingMemoryBound(to: UInt16.self) | ||
if let utf16Ptr = cocoaString._fastCharacterContents() { | ||
bufPtr.assign(from: utf16Ptr, count: length) | ||
} else { | ||
cocoaString.getCharacters(bufPtr) | ||
} | ||
|
||
} else { | ||
start = UnsafeMutableRawPointer(mutating: nulTerminatedASCII) | ||
buffer.start.copyBytes(from: nulTerminatedASCII!, count: length) | ||
} | ||
|
||
self._core = _StringCore( | ||
baseAddress: start, | ||
count: length, | ||
elementShift: isUTF16 ? 1 : 0, | ||
hasCocoaBuffer: true, | ||
owner: cfImmutableValue) | ||
self._core = _StringCore(buffer) | ||
} | ||
} | ||
|
||
|
@@ -187,29 +67,16 @@ extension String { | |
// The @_swift_native_objc_runtime_base attribute | ||
// This allows us to subclass an Objective-C class and use the fast Swift | ||
// memory allocator. | ||
// | ||
// Subclasses should provide: | ||
// * func length() -> Int | ||
// * func characterAtIndex(_ index: Int) -> UInt16 | ||
@objc @_swift_native_objc_runtime_base(_SwiftNativeNSStringBase) | ||
public class _SwiftNativeNSString {} | ||
|
||
@objc | ||
public protocol _NSStringCore : | ||
_NSCopying, _NSFastEnumeration { | ||
|
||
// The following methods should be overridden when implementing an | ||
// NSString subclass. | ||
|
||
func length() -> Int | ||
|
||
func characterAtIndex(_ index: Int) -> UInt16 | ||
|
||
// We also override the following methods for efficiency. | ||
} | ||
|
||
/// An `NSString` built around a slice of contiguous Swift `String` storage. | ||
public final class _NSContiguousString : _SwiftNativeNSString { | ||
public final class _NSContiguousString : _SwiftNativeNSString, _NSStringCore { | ||
public init(_ _core: _StringCore) { | ||
_sanityCheck( | ||
_core.hasContiguousStorage, | ||
"_NSContiguousString requires contiguous storage") | ||
self._core = _core | ||
super.init() | ||
} | ||
|
@@ -218,16 +85,16 @@ public final class _NSContiguousString : _SwiftNativeNSString { | |
_sanityCheckFailure("init(coder:) not implemented for _NSContiguousString") | ||
} | ||
|
||
func length() -> Int { | ||
public func length() -> Int { | ||
return _core.count | ||
} | ||
|
||
func characterAtIndex(_ index: Int) -> UInt16 { | ||
public func characterAtIndex(_ index: Int) -> UInt16 { | ||
return _core[index] | ||
} | ||
|
||
@inline(__always) // Performance: To save on reference count operations. | ||
func getCharacters( | ||
public func getCharacters( | ||
_ buffer: UnsafeMutablePointer<UInt16>, | ||
range aRange: _SwiftNSRange) { | ||
|
||
|
@@ -248,7 +115,7 @@ public final class _NSContiguousString : _SwiftNativeNSString { | |
} | ||
|
||
@objc | ||
func _fastCharacterContents() -> UnsafeMutablePointer<UInt16>? { | ||
public func _fastCharacterContents() -> UnsafeMutablePointer<UInt16>? { | ||
return _core.elementWidth == 2 ? _core.startUTF16 : nil | ||
} | ||
|
||
|
@@ -313,11 +180,6 @@ extension String { | |
/// Same as `_bridgeToObjectiveC()`, but located inside the core standard | ||
/// library. | ||
public func _stdlib_binary_bridgeToObjectiveCImpl() -> AnyObject { | ||
if let ns = _core.cocoaBuffer, | ||
_swift_stdlib_CFStringGetLength(ns) == _core.count { | ||
return ns | ||
} | ||
_sanityCheck(_core.hasContiguousStorage) | ||
return _NSContiguousString(_core) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it would be nice if we didn't have such a performance hit here; this will destroy all tagged pointer strings which is a decent memory savings to objc, and it destroys the fast-path accessors we have in Foundation as well. it really needs to implement the _fastCStringContents: as well as fastestEncoding and smallestEncoding. Please reference rdar://problem/18525604 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So the fact that we need to create an NsContiguousString is an old bug that needs to be fixed for sure, but if we fix that then Swift->Objc is free. The only impact is at the Objc -> Swift part. It's an often stated claim that we want String to become a single pointer, with tagged pointer optimizations. It's possible we can pick up objc's tagged pointer opts if we go that way? But we need someone to take point on the design here. I know @dabrahams is working on a lot of string stuff, but idk if he's been looking at this aspect of it yet. I'll look into filling in the fastpaths, thanks! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I am looking into that aspect and intend to handle it. |
||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just so you know, this is not 100% correct. ObjCBool is only UInt8 on some platforms, and it's theoretically possible that UInt8 and Bool would have different calling conventions.
Internal Clang bug rdar://problem/21170440 may provide a solution we can use in SwiftShims.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Summarizing: this is a bad hack that probably works fine in practice for the time being. When https://reviews.llvm.org/D26234 is merged, we can use that to do the strictly correct thing and provide a
#define
in SwiftShims.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Further,
_fastCStringContents
may need to account for the system encoding. See also #3151