Skip to content

Commit 4fb3985

Browse files
authored
Merge pull request #2111 from spevans/pr_scanhexfloat
Scanner: Implement scanning for hexdecimal floating point numbers.
2 parents bd23c5a + e8bc19c commit 4fb3985

File tree

2 files changed

+107
-132
lines changed

2 files changed

+107
-132
lines changed

Foundation/Scanner.swift

Lines changed: 89 additions & 132 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
88
//
99

10-
import CoreFoundation
1110

1211
open class Scanner: NSObject, NSCopying {
1312
internal var _scanString: String
@@ -290,199 +289,145 @@ internal struct _NSStringBuffer {
290289
}
291290
}
292291

293-
private func isADigit(_ ch: unichar) -> Bool {
294-
struct Local {
295-
static let set = CharacterSet.decimalDigits
296-
}
297-
return Local.set.contains(UnicodeScalar(ch)!)
298-
}
299-
300292

301-
private func numericValue(_ ch: unichar) -> Int {
302-
if (ch >= unichar(unicodeScalarLiteral: "0") && ch <= unichar(unicodeScalarLiteral: "9")) {
303-
return Int(ch) - Int(unichar(unicodeScalarLiteral: "0"))
304-
} else {
305-
return __CFCharDigitValue(UniChar(ch))
306-
}
293+
private func decimalValue(_ ch: unichar) -> Int? {
294+
guard let s = UnicodeScalar(ch), s.isASCII else { return nil }
295+
return Character(s).wholeNumberValue
307296
}
308297

309-
private func numericOrHexValue(_ ch: unichar) -> Int {
310-
if (ch >= unichar(unicodeScalarLiteral: "0") && ch <= unichar(unicodeScalarLiteral: "9")) {
311-
return Int(ch) - Int(unichar(unicodeScalarLiteral: "0"))
312-
} else if (ch >= unichar(unicodeScalarLiteral: "A") && ch <= unichar(unicodeScalarLiteral: "F")) {
313-
return Int(ch) + 10 - Int(unichar(unicodeScalarLiteral: "A"))
314-
} else if (ch >= unichar(unicodeScalarLiteral: "a") && ch <= unichar(unicodeScalarLiteral: "f")) {
315-
return Int(ch) + 10 - Int(unichar(unicodeScalarLiteral: "a"))
316-
} else {
317-
return -1
318-
}
298+
private func decimalOrHexValue(_ ch: unichar) -> Int? {
299+
guard let s = UnicodeScalar(ch), s.isASCII else { return nil }
300+
return Character(s).hexDigitValue
319301
}
320302

321-
private func decimalSep(_ locale: Locale?) -> String {
322-
if let loc = locale {
323-
if let sep = loc._bridgeToObjectiveC().object(forKey: .decimalSeparator) as? NSString {
324-
return sep._swiftObject
325-
}
326-
return "."
327-
} else {
328-
return decimalSep(Locale.current)
329-
}
330-
}
331303

332304
extension String {
333-
internal func scan<T: FixedWidthInteger>(_ skipSet: CharacterSet?, locationToScanFrom: inout Int, to: (T) -> Void) -> Bool {
334-
var buf = _NSStringBuffer(string: self, start: locationToScanFrom, end: length)
305+
306+
private func checkForNegative(inBuffer buf: inout _NSStringBuffer, skipping skipSet: CharacterSet? = nil) -> Bool {
335307
buf.skip(skipSet)
336-
var neg = false
337-
var localResult: T = 0
338308
if buf.currentCharacter == unichar(unicodeScalarLiteral: "-") || buf.currentCharacter == unichar(unicodeScalarLiteral: "+") {
339-
neg = buf.currentCharacter == unichar(unicodeScalarLiteral: "-")
309+
let neg = buf.currentCharacter == unichar(unicodeScalarLiteral: "-")
340310
buf.advance()
341311
buf.skip(skipSet)
312+
return neg
342313
}
343-
if (!isADigit(buf.currentCharacter)) {
344-
return false
345-
}
346-
repeat {
347-
let numeral = numericValue(buf.currentCharacter)
348-
if numeral == -1 {
349-
break
314+
return false
315+
}
316+
317+
// If a string starts: 0[xX]<Valid Hex Digits> return with the buffer pointing to the hex digits otherwise point to the start of buffer.
318+
private func skipHexStart(inBuffer buf: inout _NSStringBuffer) {
319+
let locRewindTo = buf.location
320+
if buf.currentCharacter == unichar(unicodeScalarLiteral: "0") {
321+
buf.advance()
322+
if buf.currentCharacter == unichar(unicodeScalarLiteral: "x") || buf.currentCharacter == unichar(unicodeScalarLiteral: "X") {
323+
buf.advance()
324+
if decimalOrHexValue(buf.currentCharacter) != nil {
325+
return
326+
}
350327
}
328+
buf.location = locRewindTo
329+
}
330+
}
331+
332+
internal func scan<T: FixedWidthInteger>(_ skipSet: CharacterSet?, locationToScanFrom: inout Int, to: (T) -> Void) -> Bool {
333+
var buf = _NSStringBuffer(string: self, start: locationToScanFrom, end: length)
334+
var localResult: T = 0
335+
var retval = false
336+
var neg = checkForNegative(inBuffer: &buf, skipping: skipSet)
337+
338+
while let numeral = decimalValue(buf.currentCharacter) {
339+
retval = true
351340
if (localResult >= T.max / 10) && ((localResult > T.max / 10) || T(numeral - (neg ? 1 : 0)) >= T.max - localResult * 10) {
352341
// apply the clamps and advance past the ending of the buffer where there are still digits
353342
localResult = neg ? T.min : T.max
354343
neg = false
355344
repeat {
356345
buf.advance()
357-
} while (isADigit(buf.currentCharacter))
346+
} while decimalValue(buf.currentCharacter) != nil
358347
break
359348
} else {
360349
// normal case for scanning
361350
localResult = localResult * 10 + T(numeral)
362351
}
363352
buf.advance()
364-
} while (isADigit(buf.currentCharacter))
353+
}
365354
to(neg ? -1 * localResult : localResult)
366355
locationToScanFrom = buf.location
367-
return true
356+
return retval
368357
}
369-
358+
370359
internal func scanHex<T: FixedWidthInteger>(_ skipSet: CharacterSet?, locationToScanFrom: inout Int, to: (T) -> Void) -> Bool {
371360
var buf = _NSStringBuffer(string: self, start: locationToScanFrom, end: length)
372-
buf.skip(skipSet)
373361
var localResult: T = 0
374-
var curDigit: Int
375-
if buf.currentCharacter == unichar(unicodeScalarLiteral: "0") {
376-
buf.advance()
377-
let locRewindTo = buf.location
378-
curDigit = numericOrHexValue(buf.currentCharacter)
379-
if curDigit == -1 {
380-
if buf.currentCharacter == unichar(unicodeScalarLiteral: "x") || buf.currentCharacter == unichar(unicodeScalarLiteral: "X") {
381-
buf.advance()
382-
curDigit = numericOrHexValue(buf.currentCharacter)
383-
}
384-
}
385-
if curDigit == -1 {
386-
locationToScanFrom = locRewindTo
387-
to(T(0))
388-
return true
389-
}
390-
} else {
391-
curDigit = numericOrHexValue(buf.currentCharacter)
392-
if curDigit == -1 {
393-
return false
394-
}
395-
}
396-
397-
repeat {
362+
var retval = false
363+
buf.skip(skipSet)
364+
skipHexStart(inBuffer: &buf)
365+
366+
while let numeral = decimalOrHexValue(buf.currentCharacter) {
367+
retval = true
398368
if localResult > T.max >> T(4) {
399369
localResult = T.max
400370
} else {
401-
localResult = (localResult << T(4)) + T(curDigit)
371+
localResult = (localResult << T(4)) + T(numeral)
402372
}
403373
buf.advance()
404-
curDigit = numericOrHexValue(buf.currentCharacter)
405-
} while (curDigit != -1)
406-
374+
}
375+
407376
to(localResult)
408377
locationToScanFrom = buf.location
409-
return true
378+
return retval
410379
}
411380

412-
internal func scan<T: BinaryFloatingPoint>(_ skipSet: CharacterSet?, locale: Locale?, locationToScanFrom: inout Int, to: (T) -> Void) -> Bool {
413-
let ds_chars = decimalSep(locale).utf16
414-
let ds = ds_chars[ds_chars.startIndex]
415-
var buf = _NSStringBuffer(string: self, start: locationToScanFrom, end: length)
416-
buf.skip(skipSet)
417-
var neg = false
381+
private func _scan<T: BinaryFloatingPoint>(buffer buf: inout _NSStringBuffer, locale: Locale?, neg: Bool, to: (T) -> Void, base: UInt,
382+
numericValue: ((_: unichar) -> Int?)) -> Bool {
383+
let ds = (locale ?? Locale.current).decimalSeparator?.first ?? Character(".")
418384
var localResult: T = T(0)
419-
420-
if buf.currentCharacter == unichar(unicodeScalarLiteral: "-") || buf.currentCharacter == unichar(unicodeScalarLiteral: "+") {
421-
neg = buf.currentCharacter == unichar(unicodeScalarLiteral: "-")
422-
buf.advance()
423-
buf.skip(skipSet)
424-
}
425-
if (buf.currentCharacter != ds && !isADigit(buf.currentCharacter)) {
426-
return false
427-
}
428-
429-
repeat {
430-
let numeral = numericValue(buf.currentCharacter)
431-
if numeral == -1 {
432-
break
433-
}
385+
var neg = neg
386+
387+
while let numeral = numericValue(buf.currentCharacter) {
434388
// if (localResult >= T.greatestFiniteMagnitude / T(10)) && ((localResult > T.greatestFiniteMagnitude / T(10)) || T(numericValue(buf.currentCharacter) - (neg ? 1 : 0)) >= T.greatestFiniteMagnitude - localResult * T(10)) is evidently too complex; so break it down to more "edible chunks"
435-
let limit1 = localResult >= T.greatestFiniteMagnitude / T(10)
436-
let limit2 = localResult > T.greatestFiniteMagnitude / T(10)
437-
let limit3 = T(numeral - (neg ? 1 : 0)) >= T.greatestFiniteMagnitude - localResult * T(10)
389+
let limit1 = localResult >= T.greatestFiniteMagnitude / T(base)
390+
let limit2 = localResult > T.greatestFiniteMagnitude / T(base)
391+
let limit3 = T(numeral - (neg ? 1 : 0)) >= T.greatestFiniteMagnitude - localResult * T(base)
438392
if (limit1) && (limit2 || limit3) {
439393
// apply the clamps and advance past the ending of the buffer where there are still digits
440394
localResult = neg ? -T.infinity : T.infinity
441395
neg = false
442396
repeat {
443397
buf.advance()
444-
} while (isADigit(buf.currentCharacter))
398+
} while numericValue(buf.currentCharacter) != nil
445399
break
446400
} else {
447-
localResult = localResult * T(10) + T(numeral)
401+
localResult = localResult * T(base) + T(numeral)
448402
}
449403
buf.advance()
450-
} while (isADigit(buf.currentCharacter))
404+
}
451405

452-
if buf.currentCharacter == ds {
453-
var factor = T(0.1)
406+
if let us = UnicodeScalar(buf.currentCharacter), Character(us) == ds {
407+
var factor = 1 / T(base)
454408
buf.advance()
455-
repeat {
456-
let numeral = numericValue(buf.currentCharacter)
457-
if numeral == -1 {
458-
break
459-
}
409+
while let numeral = numericValue(buf.currentCharacter) {
460410
localResult = localResult + T(numeral) * factor
461-
factor = factor * T(0.1)
411+
factor = factor / T(base)
462412
buf.advance()
463-
} while (isADigit(buf.currentCharacter))
413+
}
464414
}
465415

416+
// If this is used to parse a number in Hexadecimal, this will never be true as the 'e' or 'E' will be caught by the previous loop.
466417
if buf.currentCharacter == unichar(unicodeScalarLiteral: "e") || buf.currentCharacter == unichar(unicodeScalarLiteral: "E") {
467418
var exponent = Double(0)
468-
var negExponent = false
419+
469420
buf.advance()
470-
if buf.currentCharacter == unichar(unicodeScalarLiteral: "-") || buf.currentCharacter == unichar(unicodeScalarLiteral: "+") {
471-
negExponent = buf.currentCharacter == unichar(unicodeScalarLiteral: "-")
472-
buf.advance()
473-
}
474-
repeat {
475-
let numeral = numericValue(buf.currentCharacter)
421+
let negExponent = checkForNegative(inBuffer: &buf)
422+
423+
while let numeral = numericValue(buf.currentCharacter) {
476424
buf.advance()
477-
if numeral == -1 {
478-
break
479-
}
480-
exponent *= 10
425+
exponent *= Double(base)
481426
exponent += Double(numeral)
482-
} while (isADigit(buf.currentCharacter))
427+
}
483428

484429
if exponent > 0 {
485-
let multiplier = pow(10, exponent)
430+
let multiplier = pow(Double(base), exponent)
486431
if negExponent {
487432
localResult /= T(multiplier)
488433
} else {
@@ -492,12 +437,24 @@ extension String {
492437
}
493438

494439
to(neg ? T(-1) * localResult : localResult)
495-
locationToScanFrom = buf.location
496440
return true
497441
}
498-
442+
443+
internal func scan<T: BinaryFloatingPoint>(_ skipSet: CharacterSet?, locale: Locale?, locationToScanFrom: inout Int, to: (T) -> Void) -> Bool {
444+
var buf = _NSStringBuffer(string: self, start: locationToScanFrom, end: length)
445+
let neg = checkForNegative(inBuffer: &buf, skipping: skipSet)
446+
let result = _scan(buffer: &buf, locale: locale, neg: neg, to: to, base: 10, numericValue: decimalValue)
447+
locationToScanFrom = buf.location
448+
return result
449+
}
450+
499451
internal func scanHex<T: BinaryFloatingPoint>(_ skipSet: CharacterSet?, locale: Locale?, locationToScanFrom: inout Int, to: (T) -> Void) -> Bool {
500-
NSUnimplemented()
452+
var buf = _NSStringBuffer(string: self, start: locationToScanFrom, end: length)
453+
let neg = checkForNegative(inBuffer: &buf, skipping: skipSet)
454+
skipHexStart(inBuffer: &buf)
455+
let result = _scan(buffer: &buf, locale: locale, neg: neg, to: to, base: 16, numericValue: decimalOrHexValue)
456+
locationToScanFrom = buf.location
457+
return result
501458
}
502459
}
503460

TestFoundation/TestScanner.swift

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,23 @@ class TestScanner : XCTestCase {
111111
expectTrue($0.isAtEnd, "The X was not consumed")
112112
}
113113
}
114+
115+
func testHexFloatingPoint() {
116+
withScanner(for: "0xAA 3.14 0.1x 1g 3xx .F00x 1e00 -0xabcdef.02") {
117+
expectEqual($0.scanDouble(representation: .hexadecimal), 0xAA, "Integer as Double")
118+
expectEqual($0.scanDouble(representation: .hexadecimal), 3.078125, "Double")
119+
expectEqual($0.scanDouble(representation: .hexadecimal), 0.0625, "Double")
120+
expectEqual($0.scanString("x"), "x", "Consume non-hex-digit")
121+
expectEqual($0.scanDouble(representation: .hexadecimal), Double(1), "Double")
122+
expectEqual($0.scanString("g"), "g", "Consume non-hex-digit")
123+
expectEqual($0.scanDouble(representation: .hexadecimal), Double(3), "Double")
124+
expectEqual($0.scanString("xx"), "xx", "Consume non-hex-digits")
125+
expectEqual($0.scanDouble(representation: .hexadecimal), 0.9375, "Double")
126+
expectEqual($0.scanString("x"), "x", "Consume non-hex-digit")
127+
expectEqual($0.scanDouble(representation: .hexadecimal), 0x1E00, "E is not for exponent")
128+
expectEqual($0.scanDouble(representation: .hexadecimal), -11259375.0078125, "negative decimal")
129+
}
130+
}
114131

115132
func testUInt64() {
116133
// UInt64 long sequence:
@@ -485,6 +502,7 @@ class TestScanner : XCTestCase {
485502
return [
486503
("testScanFloatingPoint", testScanFloatingPoint),
487504
("testHexRepresentation", testHexRepresentation),
505+
("testHexFloatingPoint", testHexFloatingPoint),
488506
("testUInt64", testUInt64),
489507
("testInt64", testInt64),
490508
("testInt32", testInt32),

0 commit comments

Comments
 (0)