-
Notifications
You must be signed in to change notification settings - Fork 10.5k
Avoid attempting to create SmallStrings for constant tagged CFStrings #30966
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -40,6 +40,14 @@ typedef unsigned char _swift_shims_Boolean; | |
typedef __swift_uint8_t _swift_shims_UInt8; | ||
typedef __swift_uint32_t _swift_shims_CFStringEncoding; | ||
|
||
/* This is layout-compatible with constant CFStringRefs on Darwin */ | ||
typedef struct __swift_shims_builtin_CFString { | ||
const void * _Nonnull isa; // point to __CFConstantStringClassReference | ||
unsigned long flags; | ||
const __swift_uint8_t * _Nonnull str; | ||
unsigned long length; | ||
} _swift_shims_builtin_CFString; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does it matter that this type is mismatched on Linux? There are three different ABIs in play :-(: Default (ObjC) ( struct __NSConstantString_tag {
const int *isa;
int flags;
const char *str;
long length;
} __NSConstantString; Swift 4.1, 4.2 ( typedef struct __NSConstantString_tag {
uintptr_t _cfisa;
uintptr_t _swift_rc;
_Atomic(uint64_t) _cfinfoa;
const char *_ptr;
uint32_t _length;
} __NSConstantString; Swift 5.0 ( typedef struct __NSConstantString_tag {
uintptr_t _cfisa;
uintptr_t _swift_rc;
_Atomic(uint64_t) _cfinfoa;
const char *_ptr;
uintptr_t _length;
} __NSConstantString; There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is only intended for Darwin, but maybe there should be a guard here to make it explicit and prevent anyone from mistakenly using it more widely in the future. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe the CF shims file is already only built on Darwin, but thank you for pointing this out, it's good to be aware of. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If this is meant to be Darwin only, I think that we should do something to make that explicit (perhaps move the file) - as it is included in the Linux/Android/Windows SDKs. |
||
|
||
SWIFT_RUNTIME_STDLIB_API | ||
__swift_uint8_t _swift_stdlib_isNSString(id _Nonnull obj); | ||
|
||
|
@@ -62,6 +70,10 @@ _swift_stdlib_NSStringGetCStringTrampoline(id _Nonnull obj, | |
_swift_shims_UInt8 *_Nonnull buffer, | ||
_swift_shims_CFIndex maxLength, | ||
unsigned long encoding); | ||
|
||
SWIFT_RUNTIME_STDLIB_API | ||
__swift_uint8_t | ||
_swift_stdlib_dyld_is_objc_constant_string(const void * _Nonnull addr); | ||
|
||
#endif // __OBJC2__ | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -102,9 +102,20 @@ private func _NSStringLen(_ str: _StringSelectorHolder) -> Int { | |
internal func _stdlib_binary_CFStringGetLength( | ||
_ source: _CocoaString | ||
) -> Int { | ||
if let len = getConstantTaggedCocoaContents(source)?.utf16Length { | ||
return len | ||
} | ||
return _NSStringLen(_objc(source)) | ||
} | ||
|
||
@_effects(readonly) | ||
internal func _isNSString(_ str:AnyObject) -> Bool { | ||
if getConstantTaggedCocoaContents(str) != nil { | ||
return true | ||
} | ||
return _swift_stdlib_isNSString(str) != 0 | ||
} | ||
|
||
@_effects(readonly) | ||
private func _NSStringCharactersPtr(_ str: _StringSelectorHolder) -> UnsafeMutablePointer<UTF16.CodeUnit>? { | ||
return UnsafeMutablePointer(mutating: str._fastCharacterContents()) | ||
|
@@ -288,13 +299,24 @@ internal enum _KnownCocoaString { | |
#if !(arch(i386) || arch(arm) || arch(wasm32)) | ||
case tagged | ||
#endif | ||
#if arch(arm64) | ||
case constantTagged | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it that big a deal that this is separated out as only arm64? Or more broadly, do we want to diverge the code cross any platforms in the enum definition itself, or just at the create/use site? Even more broadly, do we want to diverge at the use site or only at the creation site? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We don't want to rely on the x86_64 tagged pointer format not accidentally overlapping the values for these, so I think it is unfortunately necessary for now There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But why is that needed for the declaration of the type, in contrast to the code that chooses what case to construct? |
||
#endif | ||
|
||
@inline(__always) | ||
init(_ str: _CocoaString) { | ||
|
||
#if !(arch(i386) || arch(arm)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's unfortunate that the Maybe we could have a private var platformSupportsObjCTaggedPointer: Bool {
#if ...
return true
# else
return false
} There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That would be nice! |
||
if _isObjCTaggedPointer(str) { | ||
#if arch(arm64) | ||
if let _ = getConstantTaggedCocoaContents(str) { | ||
self = .constantTagged | ||
} else { | ||
self = .tagged | ||
} | ||
#else | ||
self = .tagged | ||
#endif | ||
return | ||
} | ||
#endif | ||
|
@@ -333,8 +355,42 @@ private func _NSStringASCIIPointer(_ str: _StringSelectorHolder) -> UnsafePointe | |
} | ||
|
||
@_effects(readonly) // @opaque | ||
internal func _cocoaASCIIPointer(_ str: _CocoaString) -> UnsafePointer<UInt8>? { | ||
return _NSStringASCIIPointer(_objc(str)) | ||
private func _withCocoaASCIIPointer<R>( | ||
_ str: _CocoaString, | ||
requireStableAddress: Bool, | ||
work: (UnsafePointer<UInt8>) -> R? | ||
) -> R? { | ||
#if !(arch(i386) || arch(arm)) | ||
if _isObjCTaggedPointer(str) { | ||
if let ptr = getConstantTaggedCocoaContents(str)?.asciiContentsPointer { | ||
return work(ptr) | ||
} | ||
if requireStableAddress { | ||
return nil // tagged pointer strings don't support _fastCStringContents | ||
} | ||
let tmp = _StringGuts(_SmallString(taggedCocoa: str)) | ||
return tmp.withFastUTF8 { work($0.baseAddress._unsafelyUnwrappedUnchecked) } | ||
} | ||
#endif | ||
defer { _fixLifetime(str) } | ||
if let ptr = _NSStringASCIIPointer(_objc(str)) { | ||
return work(ptr) | ||
} | ||
return nil | ||
} | ||
|
||
@_effects(readonly) // @opaque | ||
internal func withCocoaASCIIPointer<R>( | ||
_ str: _CocoaString, | ||
work: (UnsafePointer<UInt8>) -> R? | ||
) -> R? { | ||
return _withCocoaASCIIPointer(str, requireStableAddress: false, work: work) | ||
} | ||
|
||
@_effects(readonly) | ||
internal func stableCocoaASCIIPointer(_ str: _CocoaString) | ||
-> UnsafePointer<UInt8>? { | ||
return _withCocoaASCIIPointer(str, requireStableAddress: true, work: { $0 }) | ||
} | ||
|
||
private enum CocoaStringPointer { | ||
|
@@ -348,16 +404,61 @@ private enum CocoaStringPointer { | |
private func _getCocoaStringPointer( | ||
_ cfImmutableValue: _CocoaString | ||
) -> CocoaStringPointer { | ||
if let asciiPtr = _cocoaASCIIPointer(cfImmutableValue) { | ||
// NOTE: CFStringGetCStringPointer means ASCII | ||
return .ascii(asciiPtr) | ||
if let ascii = stableCocoaASCIIPointer(cfImmutableValue) { | ||
return .ascii(ascii) | ||
} | ||
if let utf16Ptr = _stdlib_binary_CFStringGetCharactersPtr(cfImmutableValue) { | ||
return .utf16(utf16Ptr) | ||
} | ||
return .none | ||
} | ||
|
||
|
||
@inline(__always) | ||
private func getConstantTaggedCocoaContents(_ cocoaString: _CocoaString) -> | ||
(utf16Length: Int, asciiContentsPointer: UnsafePointer<UInt8>?)? { | ||
#if !arch(arm64) | ||
return nil | ||
#else | ||
|
||
guard _isObjCTaggedPointer(cocoaString) else { | ||
return nil | ||
} | ||
|
||
let taggedValue = unsafeBitCast(cocoaString, to: UInt.self) | ||
|
||
//11000000..payload..111 | ||
let tagMask:UInt = | ||
0b1111_1111_1000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0111 | ||
let expectedValue:UInt = | ||
0b1100_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0111 | ||
|
||
guard taggedValue & tagMask == expectedValue else { | ||
return nil | ||
} | ||
|
||
guard _swift_stdlib_dyld_is_objc_constant_string( | ||
cocoaString as! UnsafeRawPointer | ||
) == 1 else { | ||
return nil | ||
} | ||
|
||
let payloadMask = ~tagMask | ||
let payload = taggedValue & payloadMask | ||
let ivarPointer = UnsafePointer<_swift_shims_builtin_CFString>( | ||
bitPattern: payload | ||
)! | ||
let length = ivarPointer.pointee.length | ||
let isUTF16Mask:UInt = 0x0000_0000_0000_0004 //CFStringFlags bit 4: isUnicode | ||
let isASCII = ivarPointer.pointee.flags & isUTF16Mask == 0 | ||
let contentsPtr = ivarPointer.pointee.str | ||
return ( | ||
utf16Length: Int(length), | ||
asciiContentsPointer: isASCII ? contentsPtr : nil | ||
) | ||
#endif | ||
} | ||
|
||
@usableFromInline | ||
@_effects(releasenone) // @opaque | ||
internal func _bridgeCocoaString(_ cocoaString: _CocoaString) -> _StringGuts { | ||
|
@@ -371,6 +472,16 @@ internal func _bridgeCocoaString(_ cocoaString: _CocoaString) -> _StringGuts { | |
#if !(arch(i386) || arch(arm)) | ||
case .tagged: | ||
return _StringGuts(_SmallString(taggedCocoa: cocoaString)) | ||
#if arch(arm64) | ||
case .constantTagged: | ||
let taggedContents = getConstantTaggedCocoaContents(cocoaString)! | ||
return _StringGuts( | ||
cocoa: cocoaString, | ||
providesFastUTF8: false, //TODO: if contentsPtr is UTF8 compatible, use it | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is that the same as isASCII? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Today, yes. I want to change that in the future, but that requires clang and CF changes, and may never happen. |
||
isASCII: taggedContents.asciiContentsPointer != nil, | ||
length: taggedContents.utf16Length | ||
) | ||
#endif | ||
#endif | ||
case .cocoa: | ||
// "Copy" it into a value to be sure nobody will modify behind | ||
|
Uh oh!
There was an error while loading. Please reload this page.