@@ -1297,38 +1297,137 @@ extension NSString {
1297
1297
try self . init ( contentsOf: URL ( fileURLWithPath: path) , encoding: enc)
1298
1298
}
1299
1299
1300
- public convenience init ( contentsOf url: URL , usedEncoding enc: UnsafeMutablePointer < UInt > ? ) throws {
1301
- let readResult = try NSData ( contentsOf: url, options: [ ] )
1302
-
1303
- let bytePtr = readResult. bytes. bindMemory ( to: UInt8 . self, capacity: readResult. length)
1304
- if readResult. length >= 2 && bytePtr [ 0 ] == 254 && bytePtr [ 1 ] == 255 {
1305
- enc? . pointee = String . Encoding. utf16BigEndian. rawValue
1306
- }
1307
- else if readResult. length >= 2 && bytePtr [ 0 ] == 255 && bytePtr [ 1 ] == 254 {
1308
- enc? . pointee = String . Encoding. utf16LittleEndian. rawValue
1300
+ private static func _getEncodingFromDataByCheckingForUnicodeBOM( _ data: Data ) -> String . Encoding ? {
1301
+ // Check for Byte Order Mark (BOM) at the beginning of the file
1302
+ // Make sure utf32LittleEndian comes before utf16LittleEndian in the list.
1303
+ let unicodeBOMs : [ ( String . Encoding , [ UInt8 ] ) ] = [
1304
+ ( . utf8, [ 0xEF , 0xBB , 0xBF ] ) ,
1305
+ ( . utf16BigEndian, [ 0xFE , 0xFF ] ) ,
1306
+ ( . utf32LittleEndian, [ 0xFF , 0xFE , 0x00 , 0x00 ] ) ,
1307
+ ( . utf16LittleEndian, [ 0xFF , 0xFE ] ) ,
1308
+ ( . utf32BigEndian, [ 0x00 , 0x00 , 0xFE , 0xFF ] )
1309
+ ]
1310
+
1311
+ for (bomEncoding, bom) in unicodeBOMs {
1312
+ // Make sure that there are enough bytes in the data
1313
+ if data. count >= bom. count {
1314
+ var match = true
1315
+ for i in 0 ..< bom. count {
1316
+ if data [ i] != bom [ i] {
1317
+ // The BOM doesn't match
1318
+ match = false
1319
+ }
1320
+ }
1321
+ if match {
1322
+ return bomEncoding
1323
+ }
1324
+ }
1309
1325
}
1310
- else {
1311
- //Need to work on more conditions. This should be the default
1312
- enc? . pointee = String . Encoding. utf8. rawValue
1326
+ return nil
1327
+ }
1328
+
1329
+ private static func _createCFString( fromData data: Data , withEncoding encoding: String . Encoding ) -> CFString ? {
1330
+ let cf = data. withUnsafeBytes ( { ( bytes: UnsafePointer < UInt8 > ) -> CFString ? in
1331
+ return CFStringCreateWithBytes ( kCFAllocatorDefault, bytes, data. count, CFStringConvertNSStringEncodingToEncoding ( encoding. rawValue) , true )
1332
+ } )
1333
+
1334
+ return cf
1335
+ }
1336
+
1337
+ public convenience init ( contentsOf url: URL , usedEncoding enc: UnsafeMutablePointer < UInt > ? ) throws {
1338
+ // Forward to file handling init, so extended attributes can be checked
1339
+ if url. isFileURL {
1340
+ try self . init ( contentsOfFile: url. path, usedEncoding: enc)
1341
+ return
1313
1342
}
1343
+
1344
+ let readResult = try Data ( contentsOf: url, options: [ ] )
1345
+
1346
+ // If the encoding can't be found, use utf8 as the default
1347
+ let encoding = NSString . _getEncodingFromDataByCheckingForUnicodeBOM ( readResult) ?? . utf8
1348
+ enc? . pointee = encoding. rawValue
1314
1349
1315
- guard let enc = enc , let cf = CFStringCreateWithBytes ( kCFAllocatorDefault , bytePtr , readResult. length , CFStringConvertNSStringEncodingToEncoding ( enc . pointee ) , true ) else {
1316
- throw NSError ( domain: NSCocoaErrorDomain, code: CocoaError . fileReadInapplicableStringEncoding . rawValue, userInfo: [
1350
+ guard let cf = NSString . _createCFString ( fromData : readResult, withEncoding : encoding ) else {
1351
+ throw NSError ( domain: NSCocoaErrorDomain, code: CocoaError . fileReadUnknownStringEncoding . rawValue, userInfo: [
1317
1352
" NSDebugDescription " : " Unable to create a string using the specified encoding. "
1318
1353
] )
1319
1354
}
1355
+
1320
1356
var str : String ?
1321
1357
if String . _conditionallyBridgeFromObjectiveC ( cf. _nsObject, result: & str) {
1322
1358
self . init ( str!)
1323
1359
} else {
1324
- throw NSError ( domain: NSCocoaErrorDomain, code: CocoaError . fileReadInapplicableStringEncoding . rawValue, userInfo: [
1360
+ throw NSError ( domain: NSCocoaErrorDomain, code: CocoaError . fileReadUnknownStringEncoding . rawValue, userInfo: [
1325
1361
" NSDebugDescription " : " Unable to bridge CFString to String. "
1326
1362
] )
1327
1363
}
1328
1364
}
1329
1365
1366
+ private static func _getEncodingNameFromString( _ encodingStr: String ) -> String . Encoding ? {
1367
+ // Iterate through all possible CFStringEncoding values and compare to the string argument
1368
+ let cfEncodings = CFStringGetListOfAvailableEncodings ( )
1369
+ var encodingPtr = cfEncodings
1370
+
1371
+ while encodingPtr? . pointee != kCFStringEncodingInvalidId {
1372
+ if let cfEncodingName = CFStringConvertEncodingToIANACharSetName ( encodingPtr!. pointee) {
1373
+ var encodingName : String ?
1374
+ if String . _conditionallyBridgeFromObjectiveC ( cfEncodingName. _nsObject, result: & encodingName) {
1375
+ if encodingName == encodingStr {
1376
+ let encoding = CFStringConvertEncodingToNSStringEncoding ( encodingPtr!. pointee)
1377
+ return String . Encoding. init ( rawValue: encoding)
1378
+ }
1379
+ } else {
1380
+ continue
1381
+ }
1382
+ }
1383
+
1384
+ encodingPtr = encodingPtr? . advanced ( by: 1 )
1385
+ }
1386
+
1387
+ return nil
1388
+ }
1389
+
1330
1390
public convenience init ( contentsOfFile path: String , usedEncoding enc: UnsafeMutablePointer < UInt > ? ) throws {
1331
- NSUnimplemented ( )
1391
+ let readResult = try Data ( contentsOf: URL ( fileURLWithPath: path) , options: [ ] )
1392
+ var encoding : String . Encoding ?
1393
+
1394
+ // Check extended attributes for 'com.apple.TextEncoding'
1395
+ let attrName = " com.apple.TextEncoding "
1396
+ let bufCount = getxattr ( path, attrName, nil , 0 , 0 , 0 )
1397
+ if bufCount > 0 {
1398
+ var buf = [ UInt8] ( repeating: 0 , count: bufCount)
1399
+ if getxattr ( path, attrName, & buf, bufCount, 0 , 0 ) != - 1 {
1400
+ if let attrValue = String ( bytes: buf, encoding: . utf8) {
1401
+ encoding = NSString . _getEncodingNameFromString ( attrValue)
1402
+ }
1403
+ }
1404
+ }
1405
+
1406
+ // If the encoding can't be found in extended attrubutes, check for a BOM
1407
+ if encoding == nil {
1408
+ // If the encoding can't be found, use utf8 as the default
1409
+ encoding = NSString . _getEncodingFromDataByCheckingForUnicodeBOM ( readResult) ?? . utf8
1410
+ }
1411
+
1412
+ enc? . pointee = encoding!. rawValue
1413
+
1414
+ guard let cf = NSString . _createCFString ( fromData: readResult, withEncoding: encoding!) else {
1415
+ throw NSError ( domain: NSCocoaErrorDomain,
1416
+ code: CocoaError . fileReadUnknownStringEncoding. rawValue,
1417
+ userInfo: [
1418
+ " NSDebugDescription " : " The file \" \( path) \" couldn't be opened because the text encoding of its contents can't be determined. " ,
1419
+ " NSFilePath " : path
1420
+ ] )
1421
+ }
1422
+
1423
+ var str : String ?
1424
+ if String . _conditionallyBridgeFromObjectiveC ( cf. _nsObject, result: & str) {
1425
+ self . init ( str!)
1426
+ } else {
1427
+ throw NSError ( domain: NSCocoaErrorDomain, code: CocoaError . fileReadUnknownStringEncoding. rawValue, userInfo: [
1428
+ " NSDebugDescription " : " Unable to bridge CFString to String. "
1429
+ ] )
1430
+ }
1332
1431
}
1333
1432
}
1334
1433
0 commit comments