Skip to content

Commit cb7dfb1

Browse files
committed
Added implementation of NSString init(contentsOfFile:usedEncoding:)
Reworked implementation of NSString init(contentsOf:usedEncoding:) to use same BOM detection code
1 parent 031a257 commit cb7dfb1

File tree

6 files changed

+242
-56
lines changed

6 files changed

+242
-56
lines changed

Docs/Status.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ There is no _Complete_ status for test coverage because there are always additio
206206
| `NSMutableCharacterSet` | Mostly Complete | None | Decoding remains unimplemented |
207207
| `NSCFCharacterSet` | N/A | N/A | For internal use only |
208208
| `CharacterSet` | Complete | Incomplete | |
209-
| `NSString` | Mostly Complete | Substantial | `init(contentsOf:usedEncoding:)`, `init(contentsOfFile:usedEncoding:)`, `enumerateSubstrings(in:options:using:)` remain unimplemented |
209+
| `NSString` | Mostly Complete | Substantial | `enumerateSubstrings(in:options:using:)` remains unimplemented |
210210
| `NSStringEncodings` | Complete | N/A | Contains definitions of string encodings |
211211
| `NSCFString` | N/A | N/A | For internal use only |
212212
| `NSStringAPI` | N/A | N/A | Exposes `NSString` APIs on `String` |

Foundation.xcodeproj/project.pbxproj

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,8 @@
312312
9F0DD3531ECD73D200F68030 /* XDGTestHelper.swift in Sources */ = {isa = PBXBuildFile; fileRef = 9F4ADBD21ECD506E001F0B3D /* XDGTestHelper.swift */; };
313313
9F0DD3571ECD783500F68030 /* SwiftFoundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 5B5D885D1BBC938800234F36 /* SwiftFoundation.framework */; };
314314
A058C2021E529CF100B07AA1 /* TestMassFormatter.swift in Sources */ = {isa = PBXBuildFile; fileRef = A058C2011E529CF100B07AA1 /* TestMassFormatter.swift */; };
315+
A5EB58941EFC0B7C00D2651C /* NSString-UTF32-BE-data.txt in Resources */ = {isa = PBXBuildFile; fileRef = A5EB58901EFC0B0200D2651C /* NSString-UTF32-BE-data.txt */; };
316+
A5EB58951EFC0B7C00D2651C /* NSString-UTF32-LE-data.txt in Resources */ = {isa = PBXBuildFile; fileRef = A5EB58921EFC0B0200D2651C /* NSString-UTF32-LE-data.txt */; };
315317
AE35A1861CBAC85E0042DB84 /* SwiftFoundation.h in Headers */ = {isa = PBXBuildFile; fileRef = AE35A1851CBAC85E0042DB84 /* SwiftFoundation.h */; settings = {ATTRIBUTES = (Public, ); }; };
316318
B90C57BB1EEEEA5A005208AE /* TestFileManager.swift in Sources */ = {isa = PBXBuildFile; fileRef = 525AECEB1BF2C96400D15BB0 /* TestFileManager.swift */; };
317319
B90C57BC1EEEEA5A005208AE /* TestThread.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5E5835F31C20C9B500C81317 /* TestThread.swift */; };
@@ -772,6 +774,8 @@
772774
9F4ADBD21ECD506E001F0B3D /* XDGTestHelper.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; name = XDGTestHelper.swift; path = ../XDGTestHelper.swift; sourceTree = "<group>"; };
773775
A058C2011E529CF100B07AA1 /* TestMassFormatter.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = TestMassFormatter.swift; sourceTree = "<group>"; };
774776
A5A34B551C18C85D00FD972B /* TestNSByteCountFormatter.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = TestNSByteCountFormatter.swift; sourceTree = "<group>"; };
777+
A5EB58901EFC0B0200D2651C /* NSString-UTF32-BE-data.txt */ = {isa = PBXFileReference; lastKnownFileType = text; path = "NSString-UTF32-BE-data.txt"; sourceTree = "<group>"; };
778+
A5EB58921EFC0B0200D2651C /* NSString-UTF32-LE-data.txt */ = {isa = PBXFileReference; lastKnownFileType = text; path = "NSString-UTF32-LE-data.txt"; sourceTree = "<group>"; };
775779
AE35A1851CBAC85E0042DB84 /* SwiftFoundation.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SwiftFoundation.h; sourceTree = "<group>"; };
776780
B167A6641ED7303F0040B09A /* README.md */ = {isa = PBXFileReference; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = "<group>"; };
777781
B91095781EEF237800A71930 /* NSString-UTF16-LE-data.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = "NSString-UTF16-LE-data.txt"; sourceTree = "<group>"; };
@@ -1369,25 +1373,27 @@
13691373
EA66F6391BF1619600136161 /* Resources */ = {
13701374
isa = PBXGroup;
13711375
children = (
1372-
D370696D1C394FBF00295652 /* NSKeyedUnarchiver-RangeTest.plist */,
1373-
D3E8D6D41C36AC0C00295652 /* NSKeyedUnarchiver-RectTest.plist */,
1376+
EA66F6791BF9401E00136161 /* Info.plist */,
1377+
D3A597F51C3415CC00295652 /* NSKeyedUnarchiver-ArrayTest.plist */,
1378+
D3A597FB1C3417EA00295652 /* NSKeyedUnarchiver-ComplexTest.plist */,
1379+
D3A597FF1C341E9100295652 /* NSKeyedUnarchiver-ConcreteValueTest.plist */,
13741380
D3E8D6D21C36982700295652 /* NSKeyedUnarchiver-EdgeInsetsTest.plist */,
1381+
D3A597F31C34142600295652 /* NSKeyedUnarchiver-NotificationTest.plist */,
13751382
D3A598021C349E6A00295652 /* NSKeyedUnarchiver-OrderedSetTest.plist */,
1376-
D3A597FF1C341E9100295652 /* NSKeyedUnarchiver-ConcreteValueTest.plist */,
1377-
D3A597FB1C3417EA00295652 /* NSKeyedUnarchiver-ComplexTest.plist */,
1378-
D3A597F91C3415F000295652 /* NSKeyedUnarchiver-UUIDTest.plist */,
1379-
D3A597F51C3415CC00295652 /* NSKeyedUnarchiver-ArrayTest.plist */,
1383+
D370696D1C394FBF00295652 /* NSKeyedUnarchiver-RangeTest.plist */,
1384+
D3E8D6D41C36AC0C00295652 /* NSKeyedUnarchiver-RectTest.plist */,
13801385
D3A597F61C3415CC00295652 /* NSKeyedUnarchiver-URLTest.plist */,
1381-
D3A597F31C34142600295652 /* NSKeyedUnarchiver-NotificationTest.plist */,
1382-
EA66F6791BF9401E00136161 /* Info.plist */,
1383-
CE19A88B1C23AA2300B4CB6A /* NSStringTestData.txt */,
1384-
B91095781EEF237800A71930 /* NSString-UTF16-LE-data.txt */,
1386+
D3A597F91C3415F000295652 /* NSKeyedUnarchiver-UUIDTest.plist */,
13851387
B91095791EEF237800A71930 /* NSString-UTF16-BE-data.txt */,
1386-
528776181BF27D9500CB0090 /* Test.plist */,
1388+
B91095781EEF237800A71930 /* NSString-UTF16-LE-data.txt */,
1389+
A5EB58901EFC0B0200D2651C /* NSString-UTF32-BE-data.txt */,
1390+
A5EB58921EFC0B0200D2651C /* NSString-UTF32-LE-data.txt */,
1391+
CE19A88B1C23AA2300B4CB6A /* NSStringTestData.txt */,
13871392
EA66F63B1BF1619600136161 /* NSURLTestData.plist */,
13881393
E1A3726E1C31EBFB0023AF4D /* NSXMLDocumentTestData.xml */,
1389-
E1A03F351C4828650023AF4D /* PropertyList-1.0.dtd */,
13901394
E1A03F371C482C730023AF4D /* NSXMLDTDTestData.xml */,
1395+
E1A03F351C4828650023AF4D /* PropertyList-1.0.dtd */,
1396+
528776181BF27D9500CB0090 /* Test.plist */,
13911397
);
13921398
path = Resources;
13931399
sourceTree = "<group>";
@@ -2053,6 +2059,8 @@
20532059
isa = PBXResourcesBuildPhase;
20542060
buildActionMask = 2147483647;
20552061
files = (
2062+
A5EB58941EFC0B7C00D2651C /* NSString-UTF32-BE-data.txt in Resources */,
2063+
A5EB58951EFC0B7C00D2651C /* NSString-UTF32-LE-data.txt in Resources */,
20562064
D3A597F41C34142600295652 /* NSKeyedUnarchiver-NotificationTest.plist in Resources */,
20572065
528776191BF27D9500CB0090 /* Test.plist in Resources */,
20582066
EA66F6481BF1619600136161 /* NSURLTestData.plist in Resources */,

Foundation/NSString.swift

Lines changed: 115 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1297,38 +1297,137 @@ extension NSString {
12971297
try self.init(contentsOf: URL(fileURLWithPath: path), encoding: enc)
12981298
}
12991299

1300-
public convenience init(contentsOf url: URL, usedEncoding enc: UnsafeMutablePointer<UInt>?) throws {
1301-
let readResult = try NSData(contentsOf: url, options:[])
1302-
1303-
let bytePtr = readResult.bytes.bindMemory(to: UInt8.self, capacity:readResult.length)
1304-
if readResult.length >= 2 && bytePtr[0] == 254 && bytePtr[1] == 255 {
1305-
enc?.pointee = String.Encoding.utf16BigEndian.rawValue
1306-
}
1307-
else if readResult.length >= 2 && bytePtr[0] == 255 && bytePtr[1] == 254 {
1308-
enc?.pointee = String.Encoding.utf16LittleEndian.rawValue
1300+
private static func _getEncodingFromDataByCheckingForUnicodeBOM(_ data: Data) -> String.Encoding? {
1301+
// Check for Byte Order Mark (BOM) at the beginning of the file
1302+
// Make sure utf32LittleEndian comes before utf16LittleEndian in the list.
1303+
let unicodeBOMs: [(String.Encoding, [UInt8])] = [
1304+
(.utf8, [0xEF, 0xBB, 0xBF]),
1305+
(.utf16BigEndian, [0xFE, 0xFF]),
1306+
(.utf32LittleEndian, [0xFF, 0xFE, 0x00, 0x00]),
1307+
(.utf16LittleEndian, [0xFF, 0xFE]),
1308+
(.utf32BigEndian, [0x00, 0x00, 0xFE, 0xFF])
1309+
]
1310+
1311+
for (bomEncoding, bom) in unicodeBOMs {
1312+
// Make sure that there are enough bytes in the data
1313+
if data.count >= bom.count {
1314+
var match = true
1315+
for i in 0..<bom.count {
1316+
if data[i] != bom[i] {
1317+
// The BOM doesn't match
1318+
match = false
1319+
}
1320+
}
1321+
if match {
1322+
return bomEncoding
1323+
}
1324+
}
13091325
}
1310-
else {
1311-
//Need to work on more conditions. This should be the default
1312-
enc?.pointee = String.Encoding.utf8.rawValue
1326+
return nil
1327+
}
1328+
1329+
private static func _createCFString(fromData data: Data, withEncoding encoding: String.Encoding) -> CFString? {
1330+
let cf = data.withUnsafeBytes({ (bytes: UnsafePointer<UInt8>) -> CFString? in
1331+
return CFStringCreateWithBytes(kCFAllocatorDefault, bytes, data.count, CFStringConvertNSStringEncodingToEncoding(encoding.rawValue), true)
1332+
})
1333+
1334+
return cf
1335+
}
1336+
1337+
public convenience init(contentsOf url: URL, usedEncoding enc: UnsafeMutablePointer<UInt>?) throws {
1338+
// Forward to file handling init, so extended attributes can be checked
1339+
if url.isFileURL {
1340+
try self.init(contentsOfFile: url.path, usedEncoding: enc)
1341+
return
13131342
}
1343+
1344+
let readResult = try Data(contentsOf: url, options:[])
1345+
1346+
// If the encoding can't be found, use utf8 as the default
1347+
let encoding = NSString._getEncodingFromDataByCheckingForUnicodeBOM(readResult) ?? .utf8
1348+
enc?.pointee = encoding.rawValue
13141349

1315-
guard let enc = enc, let cf = CFStringCreateWithBytes(kCFAllocatorDefault, bytePtr, readResult.length, CFStringConvertNSStringEncodingToEncoding(enc.pointee), true) else {
1316-
throw NSError(domain: NSCocoaErrorDomain, code: CocoaError.fileReadInapplicableStringEncoding.rawValue, userInfo: [
1350+
guard let cf = NSString._createCFString(fromData: readResult, withEncoding: encoding) else {
1351+
throw NSError(domain: NSCocoaErrorDomain, code: CocoaError.fileReadUnknownStringEncoding.rawValue, userInfo: [
13171352
"NSDebugDescription" : "Unable to create a string using the specified encoding."
13181353
])
13191354
}
1355+
13201356
var str: String?
13211357
if String._conditionallyBridgeFromObjectiveC(cf._nsObject, result: &str) {
13221358
self.init(str!)
13231359
} else {
1324-
throw NSError(domain: NSCocoaErrorDomain, code: CocoaError.fileReadInapplicableStringEncoding.rawValue, userInfo: [
1360+
throw NSError(domain: NSCocoaErrorDomain, code: CocoaError.fileReadUnknownStringEncoding.rawValue, userInfo: [
13251361
"NSDebugDescription" : "Unable to bridge CFString to String."
13261362
])
13271363
}
13281364
}
13291365

1366+
private static func _getEncodingNameFromString(_ encodingStr: String) -> String.Encoding? {
1367+
// Iterate through all possible CFStringEncoding values and compare to the string argument
1368+
let cfEncodings = CFStringGetListOfAvailableEncodings()
1369+
var encodingPtr = cfEncodings
1370+
1371+
while encodingPtr?.pointee != kCFStringEncodingInvalidId {
1372+
if let cfEncodingName = CFStringConvertEncodingToIANACharSetName(encodingPtr!.pointee) {
1373+
var encodingName: String?
1374+
if String._conditionallyBridgeFromObjectiveC(cfEncodingName._nsObject, result: &encodingName) {
1375+
if encodingName == encodingStr {
1376+
let encoding = CFStringConvertEncodingToNSStringEncoding(encodingPtr!.pointee)
1377+
return String.Encoding.init(rawValue: encoding)
1378+
}
1379+
} else {
1380+
continue
1381+
}
1382+
}
1383+
1384+
encodingPtr = encodingPtr?.advanced(by: 1)
1385+
}
1386+
1387+
return nil
1388+
}
1389+
13301390
public convenience init(contentsOfFile path: String, usedEncoding enc: UnsafeMutablePointer<UInt>?) throws {
1331-
NSUnimplemented()
1391+
let readResult = try Data(contentsOf: URL(fileURLWithPath: path), options:[])
1392+
var encoding: String.Encoding?
1393+
1394+
// Check extended attributes for 'com.apple.TextEncoding'
1395+
let attrName = "com.apple.TextEncoding"
1396+
let bufCount = getxattr(path, attrName, nil, 0, 0, 0)
1397+
if bufCount > 0 {
1398+
var buf = [UInt8](repeating: 0, count: bufCount)
1399+
if getxattr(path, attrName, &buf, bufCount, 0, 0) != -1 {
1400+
if let attrValue = String(bytes: buf, encoding: .utf8) {
1401+
encoding = NSString._getEncodingNameFromString(attrValue)
1402+
}
1403+
}
1404+
}
1405+
1406+
// If the encoding can't be found in extended attrubutes, check for a BOM
1407+
if encoding == nil {
1408+
// If the encoding can't be found, use utf8 as the default
1409+
encoding = NSString._getEncodingFromDataByCheckingForUnicodeBOM(readResult) ?? .utf8
1410+
}
1411+
1412+
enc?.pointee = encoding!.rawValue
1413+
1414+
guard let cf = NSString._createCFString(fromData: readResult, withEncoding: encoding!) else {
1415+
throw NSError(domain: NSCocoaErrorDomain,
1416+
code: CocoaError.fileReadUnknownStringEncoding.rawValue,
1417+
userInfo: [
1418+
"NSDebugDescription" : "The file \"\(path)\" couldn't be opened because the text encoding of its contents can't be determined.",
1419+
"NSFilePath": path
1420+
])
1421+
}
1422+
1423+
var str: String?
1424+
if String._conditionallyBridgeFromObjectiveC(cf._nsObject, result: &str) {
1425+
self.init(str!)
1426+
} else {
1427+
throw NSError(domain: NSCocoaErrorDomain, code: CocoaError.fileReadUnknownStringEncoding.rawValue, userInfo: [
1428+
"NSDebugDescription" : "Unable to bridge CFString to String."
1429+
])
1430+
}
13321431
}
13331432
}
13341433

244 Bytes
Binary file not shown.
244 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)