Skip to content

[SR-2225] String bridging and internal NUL (rebased) #604

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions Foundation/NSString.swift
Original file line number Diff line number Diff line change
Expand Up @@ -1179,9 +1179,7 @@ extension NSString {
}

public convenience init?(data: Data, encoding: UInt) {
guard let cf = data.withUnsafeBytes({ (bytes: UnsafePointer<UInt8>) -> CFString? in
return CFStringCreateWithBytes(kCFAllocatorDefault, bytes, data.count, CFStringConvertNSStringEncodingToEncoding(encoding), true)
}) else { return nil }
guard let cf = CFStringCreateFromExternalRepresentation(kCFAllocatorDefault, data._cfObject, CFStringConvertNSStringEncodingToEncoding(encoding)) else { return nil }

var str: String?
if String._conditionallyBridgeFromObjectiveC(cf._nsObject, result: &str) {
Expand Down
21 changes: 13 additions & 8 deletions Foundation/String.swift
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,20 @@ extension String : _ObjectTypeBridgeable {
result = source._storage
} else if type(of: source) == _NSCFString.self {
let cf = unsafeBitCast(source, to: CFString.self)
let str = CFStringGetCStringPtr(cf, CFStringEncoding(kCFStringEncodingUTF8))
if str != nil {
result = String(cString: str!)
let length = CFStringGetLength(cf) // This value is not always the length in characters, in spite of what the documentation says

// FIXME: If we had a reliable way to determine the length of `cf`
// in bytes, then we wouldn't have to allocate a new buffer
// if `CFStringGetCStringPtr(_:_:)` doesn't return nil.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The correct way to obtain the backing buffers is to expect nil and switch on either the 8 bit or 16 bit backing variants of CFString more like the previous code did, else you have a distinct chance that the CFStringGetBytes function will have to do a full walk of the unichars and convert each one.

This is roughly how a c implementation can be the fastest at converting to either a UTF8 or UTF16 buffer with an applier pseudo-lambda

void applyBuffer(CFStringRef str, void (*applier)(const void *buffer, CFStringEncoding)) {
    const char *cptr = CFStringGetCStringPtr(str, kCFStringEncodingUTF8);
    if (cptr != NULL) {
        applier(cptr, kCFStringEncodingUTF8);
    } else {
        const UniChar *uptr = CFStringGetCharactersPtr(str);
        if (uptr != NULL) {
            applier(uptr, kCFStringEncodingUTF16);
        } else {
            CFIndex length = CFStringGetLength(str);
            UniChar stack_characters[1024] = {0}; // adjust stack size according to taste
            UniChar *characters = &stack_characters[0];
            if (length > sizeof(stack_characters) / sizeof(stack_characters[0])) {
                characters = malloc(sizeof(UniChar) * length);
            }
            CFStringGetCharacters(str, CFRangeMake(0, length), characters);
            applier(characters, kCFStringEncodingUTF16);
            if (characters != &stack_characters[0]) {
                free(characters);
            }
        }
    }
}

CFString, NSString and struct String all have dual backing implementations that either are fast paths for 8 bit storage or 16 bit storage.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Neat. I will study this code and implement accordingly. Currently traveling so can't promise quick turnaround.

if let buffer = CFStringGetCharactersPtr(cf) {
result = String._fromCodeUnitSequence(UTF16.self, input: UnsafeBufferPointer(start: buffer, count: length))
} else {
let length = CFStringGetLength(cf)
let buffer = UnsafeMutablePointer<UniChar>.allocate(capacity: length)
CFStringGetCharacters(cf, CFRangeMake(0, length), buffer)

let str = String._fromCodeUnitSequence(UTF16.self, input: UnsafeBufferPointer(start: buffer, count: length))
// Retrieving Unicode characters is unreliable; instead retrieve UTF8-encoded bytes
let max = CFStringGetMaximumSizeForEncoding(length, kCFStringEncodingUTF8)
let buffer = UnsafeMutablePointer<UInt8>.allocate(capacity: max)
var count: CFIndex = -1
CFStringGetBytes(cf, CFRangeMake(0, length), kCFStringEncodingUTF8, 0, false, buffer, max, &count)
let str = String._fromCodeUnitSequence(UTF8.self, input: UnsafeBufferPointer(start: buffer, count: count))
buffer.deinitialize(count: length)
buffer.deallocate(capacity: length)
result = str
Expand Down
8 changes: 4 additions & 4 deletions TestFoundation/TestNSJSONSerialization.swift
Original file line number Diff line number Diff line change
Expand Up @@ -596,7 +596,7 @@ extension TestNSJSONSerialization {
("test_jsonObjectToOutputStreamBuffer", test_jsonObjectToOutputStreamBuffer),
("test_jsonObjectToOutputStreamFile", test_jsonObjectToOutputStreamFile),
("test_invalidJsonObjectToStreamBuffer", test_invalidJsonObjectToStreamBuffer),
("test_jsonObjectToOutputStreamInsufficeintBuffer", test_jsonObjectToOutputStreamInsufficeintBuffer),
("test_jsonObjectToOutputStreamInsufficientBuffer", test_jsonObjectToOutputStreamInsufficientBuffer),
("test_booleanJSONObject", test_booleanJSONObject),
]
}
Expand Down Expand Up @@ -868,7 +868,7 @@ extension TestNSJSONSerialization {
let result = try JSONSerialization.writeJSONObject(dict, toStream: outputStream, options: [])
outputStream.close()
if(result > -1) {
XCTAssertEqual(NSString(bytes: buffer, length: buffer.count, encoding: String.Encoding.utf8.rawValue), "{\"a\":{\"b\":1}}")
XCTAssertEqual(NSString(bytes: buffer, length: buffer.count, encoding: String.Encoding.utf8.rawValue), "{\"a\":{\"b\":1}}\0\0\0\0\0\0\0")
}
} catch {
XCTFail("Error thrown: \(error)")
Expand All @@ -892,7 +892,7 @@ extension TestNSJSONSerialization {
let resultRead: Int = fileStream.read(&buffer, maxLength: buffer.count)
fileStream.close()
if(resultRead > -1){
XCTAssertEqual(NSString(bytes: buffer, length: buffer.count, encoding: String.Encoding.utf8.rawValue), "{\"a\":{\"b\":1}}")
XCTAssertEqual(NSString(bytes: buffer, length: buffer.count, encoding: String.Encoding.utf8.rawValue), "{\"a\":{\"b\":1}}\0\0\0\0\0\0\0")
}
}
removeTestFile(filePath!)
Expand All @@ -905,7 +905,7 @@ extension TestNSJSONSerialization {
}
}

func test_jsonObjectToOutputStreamInsufficeintBuffer() {
func test_jsonObjectToOutputStreamInsufficientBuffer() {
let dict = ["a":["b":1]]
let buffer = Array<UInt8>(repeating: 0, count: 10)
let outputStream = OutputStream(toBuffer: UnsafeMutablePointer(mutating: buffer), capacity: buffer.count)
Expand Down
6 changes: 3 additions & 3 deletions TestFoundation/TestNSStream.swift
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class TestNSStream : XCTestCase {
XCTAssertEqual(Stream.Status.notOpen, dataStream.streamStatus)
dataStream.open()
XCTAssertEqual(Stream.Status.open, dataStream.streamStatus)
var buffer = [UInt8](repeating: 0, count: 20)
var buffer = [UInt8](repeating: 0, count: 17)
if dataStream.hasBytesAvailable {
let result: Int = dataStream.read(&buffer, maxLength: buffer.count)
dataStream.close()
Expand All @@ -62,7 +62,7 @@ class TestNSStream : XCTestCase {
XCTAssertEqual(Stream.Status.notOpen, urlStream.streamStatus)
urlStream.open()
XCTAssertEqual(Stream.Status.open, urlStream.streamStatus)
var buffer = [UInt8](repeating: 0, count: 20)
var buffer = [UInt8](repeating: 0, count: 17)
if urlStream.hasBytesAvailable {
let result :Int = urlStream.read(&buffer, maxLength: buffer.count)
urlStream.close()
Expand All @@ -89,7 +89,7 @@ class TestNSStream : XCTestCase {
XCTAssertEqual(Stream.Status.notOpen, fileStream.streamStatus)
fileStream.open()
XCTAssertEqual(Stream.Status.open, fileStream.streamStatus)
var buffer = [UInt8](repeating: 0, count: 20)
var buffer = [UInt8](repeating: 0, count: 17)
if fileStream.hasBytesAvailable {
let result: Int = fileStream.read(&buffer, maxLength: buffer.count)
fileStream.close()
Expand Down
11 changes: 10 additions & 1 deletion TestFoundation/TestNSString.swift
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ class TestNSString : XCTestCase {
("test_FromNullTerminatedCStringInUTF8", test_FromNullTerminatedCStringInUTF8 ),
// Swift3 updates broke the expectations of this test. disabling for now
// ("test_FromMalformedNullTerminatedCStringInUTF8", test_FromMalformedNullTerminatedCStringInUTF8 ),
("test_FromDataWithInternalNullCharacter", test_FromDataWithInternalNullCharacter ),
("test_uppercaseString", test_uppercaseString ),
("test_lowercaseString", test_lowercaseString ),
("test_capitalizedString", test_capitalizedString ),
Expand All @@ -69,7 +70,7 @@ class TestNSString : XCTestCase {
("test_FromContentOfFile",test_FromContentOfFile),
("test_swiftStringUTF16", test_swiftStringUTF16),
// This test takes forever on build servers; it has been seen up to 1852.084 seconds
// ("test_completePathIntoString", test_completePathIntoString),
// ("test_completePathIntoString", test_completePathIntoString),
("test_stringByTrimmingCharactersInSet", test_stringByTrimmingCharactersInSet),
("test_initializeWithFormat", test_initializeWithFormat),
("test_initializeWithFormat2", test_initializeWithFormat2),
Expand Down Expand Up @@ -272,6 +273,14 @@ class TestNSString : XCTestCase {
XCTAssertNil(string)
}

func test_FromDataWithInternalNullCharacter() {
let bytes = mockASCIIStringBytes + [0x00] + mockASCIIStringBytes
let length = mockASCIIStringBytes.count * 2 + 1
let data = Data(bytes: bytes, count: length)
let string = NSString(data: data, encoding: String.Encoding.ascii.rawValue)
XCTAssertTrue(string?.isEqual(to: mockASCIIString + "\0" + mockASCIIString) ?? false)
}

func test_FromContentsOfURL() {
guard let testFileURL = testBundle().url(forResource: "NSStringTestData", withExtension: "txt") else {
XCTFail("URL for NSStringTestData.txt is nil")
Expand Down