Skip to content

Commit e8bc19c

Browse files
committed
Scanner: Implement scanning for hexdecimal floating point numbers.
1 parent 591789d commit e8bc19c

File tree

2 files changed

+107
-132
lines changed

2 files changed

+107
-132
lines changed

Foundation/Scanner.swift

Lines changed: 89 additions & 132 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
88
//
99

10-
import CoreFoundation
1110

1211
open class Scanner: NSObject, NSCopying {
1312
internal var _scanString: String
@@ -286,199 +285,145 @@ internal struct _NSStringBuffer {
286285
}
287286
}
288287

289-
private func isADigit(_ ch: unichar) -> Bool {
290-
struct Local {
291-
static let set = CharacterSet.decimalDigits
292-
}
293-
return Local.set.contains(UnicodeScalar(ch)!)
294-
}
295-
296288

297-
private func numericValue(_ ch: unichar) -> Int {
298-
if (ch >= unichar(unicodeScalarLiteral: "0") && ch <= unichar(unicodeScalarLiteral: "9")) {
299-
return Int(ch) - Int(unichar(unicodeScalarLiteral: "0"))
300-
} else {
301-
return __CFCharDigitValue(UniChar(ch))
302-
}
289+
private func decimalValue(_ ch: unichar) -> Int? {
290+
guard let s = UnicodeScalar(ch), s.isASCII else { return nil }
291+
return Character(s).wholeNumberValue
303292
}
304293

305-
private func numericOrHexValue(_ ch: unichar) -> Int {
306-
if (ch >= unichar(unicodeScalarLiteral: "0") && ch <= unichar(unicodeScalarLiteral: "9")) {
307-
return Int(ch) - Int(unichar(unicodeScalarLiteral: "0"))
308-
} else if (ch >= unichar(unicodeScalarLiteral: "A") && ch <= unichar(unicodeScalarLiteral: "F")) {
309-
return Int(ch) + 10 - Int(unichar(unicodeScalarLiteral: "A"))
310-
} else if (ch >= unichar(unicodeScalarLiteral: "a") && ch <= unichar(unicodeScalarLiteral: "f")) {
311-
return Int(ch) + 10 - Int(unichar(unicodeScalarLiteral: "a"))
312-
} else {
313-
return -1
314-
}
294+
private func decimalOrHexValue(_ ch: unichar) -> Int? {
295+
guard let s = UnicodeScalar(ch), s.isASCII else { return nil }
296+
return Character(s).hexDigitValue
315297
}
316298

317-
private func decimalSep(_ locale: Locale?) -> String {
318-
if let loc = locale {
319-
if let sep = loc._bridgeToObjectiveC().object(forKey: .decimalSeparator) as? NSString {
320-
return sep._swiftObject
321-
}
322-
return "."
323-
} else {
324-
return decimalSep(Locale.current)
325-
}
326-
}
327299

328300
extension String {
329-
internal func scan<T: FixedWidthInteger>(_ skipSet: CharacterSet?, locationToScanFrom: inout Int, to: (T) -> Void) -> Bool {
330-
var buf = _NSStringBuffer(string: self, start: locationToScanFrom, end: length)
301+
302+
private func checkForNegative(inBuffer buf: inout _NSStringBuffer, skipping skipSet: CharacterSet? = nil) -> Bool {
331303
buf.skip(skipSet)
332-
var neg = false
333-
var localResult: T = 0
334304
if buf.currentCharacter == unichar(unicodeScalarLiteral: "-") || buf.currentCharacter == unichar(unicodeScalarLiteral: "+") {
335-
neg = buf.currentCharacter == unichar(unicodeScalarLiteral: "-")
305+
let neg = buf.currentCharacter == unichar(unicodeScalarLiteral: "-")
336306
buf.advance()
337307
buf.skip(skipSet)
308+
return neg
338309
}
339-
if (!isADigit(buf.currentCharacter)) {
340-
return false
341-
}
342-
repeat {
343-
let numeral = numericValue(buf.currentCharacter)
344-
if numeral == -1 {
345-
break
310+
return false
311+
}
312+
313+
// If a string starts: 0[xX]<Valid Hex Digits> return with the buffer pointing to the hex digits otherwise point to the start of buffer.
314+
private func skipHexStart(inBuffer buf: inout _NSStringBuffer) {
315+
let locRewindTo = buf.location
316+
if buf.currentCharacter == unichar(unicodeScalarLiteral: "0") {
317+
buf.advance()
318+
if buf.currentCharacter == unichar(unicodeScalarLiteral: "x") || buf.currentCharacter == unichar(unicodeScalarLiteral: "X") {
319+
buf.advance()
320+
if decimalOrHexValue(buf.currentCharacter) != nil {
321+
return
322+
}
346323
}
324+
buf.location = locRewindTo
325+
}
326+
}
327+
328+
internal func scan<T: FixedWidthInteger>(_ skipSet: CharacterSet?, locationToScanFrom: inout Int, to: (T) -> Void) -> Bool {
329+
var buf = _NSStringBuffer(string: self, start: locationToScanFrom, end: length)
330+
var localResult: T = 0
331+
var retval = false
332+
var neg = checkForNegative(inBuffer: &buf, skipping: skipSet)
333+
334+
while let numeral = decimalValue(buf.currentCharacter) {
335+
retval = true
347336
if (localResult >= T.max / 10) && ((localResult > T.max / 10) || T(numeral - (neg ? 1 : 0)) >= T.max - localResult * 10) {
348337
// apply the clamps and advance past the ending of the buffer where there are still digits
349338
localResult = neg ? T.min : T.max
350339
neg = false
351340
repeat {
352341
buf.advance()
353-
} while (isADigit(buf.currentCharacter))
342+
} while decimalValue(buf.currentCharacter) != nil
354343
break
355344
} else {
356345
// normal case for scanning
357346
localResult = localResult * 10 + T(numeral)
358347
}
359348
buf.advance()
360-
} while (isADigit(buf.currentCharacter))
349+
}
361350
to(neg ? -1 * localResult : localResult)
362351
locationToScanFrom = buf.location
363-
return true
352+
return retval
364353
}
365-
354+
366355
internal func scanHex<T: FixedWidthInteger>(_ skipSet: CharacterSet?, locationToScanFrom: inout Int, to: (T) -> Void) -> Bool {
367356
var buf = _NSStringBuffer(string: self, start: locationToScanFrom, end: length)
368-
buf.skip(skipSet)
369357
var localResult: T = 0
370-
var curDigit: Int
371-
if buf.currentCharacter == unichar(unicodeScalarLiteral: "0") {
372-
buf.advance()
373-
let locRewindTo = buf.location
374-
curDigit = numericOrHexValue(buf.currentCharacter)
375-
if curDigit == -1 {
376-
if buf.currentCharacter == unichar(unicodeScalarLiteral: "x") || buf.currentCharacter == unichar(unicodeScalarLiteral: "X") {
377-
buf.advance()
378-
curDigit = numericOrHexValue(buf.currentCharacter)
379-
}
380-
}
381-
if curDigit == -1 {
382-
locationToScanFrom = locRewindTo
383-
to(T(0))
384-
return true
385-
}
386-
} else {
387-
curDigit = numericOrHexValue(buf.currentCharacter)
388-
if curDigit == -1 {
389-
return false
390-
}
391-
}
392-
393-
repeat {
358+
var retval = false
359+
buf.skip(skipSet)
360+
skipHexStart(inBuffer: &buf)
361+
362+
while let numeral = decimalOrHexValue(buf.currentCharacter) {
363+
retval = true
394364
if localResult > T.max >> T(4) {
395365
localResult = T.max
396366
} else {
397-
localResult = (localResult << T(4)) + T(curDigit)
367+
localResult = (localResult << T(4)) + T(numeral)
398368
}
399369
buf.advance()
400-
curDigit = numericOrHexValue(buf.currentCharacter)
401-
} while (curDigit != -1)
402-
370+
}
371+
403372
to(localResult)
404373
locationToScanFrom = buf.location
405-
return true
374+
return retval
406375
}
407376

408-
internal func scan<T: BinaryFloatingPoint>(_ skipSet: CharacterSet?, locale: Locale?, locationToScanFrom: inout Int, to: (T) -> Void) -> Bool {
409-
let ds_chars = decimalSep(locale).utf16
410-
let ds = ds_chars[ds_chars.startIndex]
411-
var buf = _NSStringBuffer(string: self, start: locationToScanFrom, end: length)
412-
buf.skip(skipSet)
413-
var neg = false
377+
private func _scan<T: BinaryFloatingPoint>(buffer buf: inout _NSStringBuffer, locale: Locale?, neg: Bool, to: (T) -> Void, base: UInt,
378+
numericValue: ((_: unichar) -> Int?)) -> Bool {
379+
let ds = (locale ?? Locale.current).decimalSeparator?.first ?? Character(".")
414380
var localResult: T = T(0)
415-
416-
if buf.currentCharacter == unichar(unicodeScalarLiteral: "-") || buf.currentCharacter == unichar(unicodeScalarLiteral: "+") {
417-
neg = buf.currentCharacter == unichar(unicodeScalarLiteral: "-")
418-
buf.advance()
419-
buf.skip(skipSet)
420-
}
421-
if (buf.currentCharacter != ds && !isADigit(buf.currentCharacter)) {
422-
return false
423-
}
424-
425-
repeat {
426-
let numeral = numericValue(buf.currentCharacter)
427-
if numeral == -1 {
428-
break
429-
}
381+
var neg = neg
382+
383+
while let numeral = numericValue(buf.currentCharacter) {
430384
// if (localResult >= T.greatestFiniteMagnitude / T(10)) && ((localResult > T.greatestFiniteMagnitude / T(10)) || T(numericValue(buf.currentCharacter) - (neg ? 1 : 0)) >= T.greatestFiniteMagnitude - localResult * T(10)) is evidently too complex; so break it down to more "edible chunks"
431-
let limit1 = localResult >= T.greatestFiniteMagnitude / T(10)
432-
let limit2 = localResult > T.greatestFiniteMagnitude / T(10)
433-
let limit3 = T(numeral - (neg ? 1 : 0)) >= T.greatestFiniteMagnitude - localResult * T(10)
385+
let limit1 = localResult >= T.greatestFiniteMagnitude / T(base)
386+
let limit2 = localResult > T.greatestFiniteMagnitude / T(base)
387+
let limit3 = T(numeral - (neg ? 1 : 0)) >= T.greatestFiniteMagnitude - localResult * T(base)
434388
if (limit1) && (limit2 || limit3) {
435389
// apply the clamps and advance past the ending of the buffer where there are still digits
436390
localResult = neg ? -T.infinity : T.infinity
437391
neg = false
438392
repeat {
439393
buf.advance()
440-
} while (isADigit(buf.currentCharacter))
394+
} while numericValue(buf.currentCharacter) != nil
441395
break
442396
} else {
443-
localResult = localResult * T(10) + T(numeral)
397+
localResult = localResult * T(base) + T(numeral)
444398
}
445399
buf.advance()
446-
} while (isADigit(buf.currentCharacter))
400+
}
447401

448-
if buf.currentCharacter == ds {
449-
var factor = T(0.1)
402+
if let us = UnicodeScalar(buf.currentCharacter), Character(us) == ds {
403+
var factor = 1 / T(base)
450404
buf.advance()
451-
repeat {
452-
let numeral = numericValue(buf.currentCharacter)
453-
if numeral == -1 {
454-
break
455-
}
405+
while let numeral = numericValue(buf.currentCharacter) {
456406
localResult = localResult + T(numeral) * factor
457-
factor = factor * T(0.1)
407+
factor = factor / T(base)
458408
buf.advance()
459-
} while (isADigit(buf.currentCharacter))
409+
}
460410
}
461411

412+
// If this is used to parse a number in Hexadecimal, this will never be true as the 'e' or 'E' will be caught by the previous loop.
462413
if buf.currentCharacter == unichar(unicodeScalarLiteral: "e") || buf.currentCharacter == unichar(unicodeScalarLiteral: "E") {
463414
var exponent = Double(0)
464-
var negExponent = false
415+
465416
buf.advance()
466-
if buf.currentCharacter == unichar(unicodeScalarLiteral: "-") || buf.currentCharacter == unichar(unicodeScalarLiteral: "+") {
467-
negExponent = buf.currentCharacter == unichar(unicodeScalarLiteral: "-")
468-
buf.advance()
469-
}
470-
repeat {
471-
let numeral = numericValue(buf.currentCharacter)
417+
let negExponent = checkForNegative(inBuffer: &buf)
418+
419+
while let numeral = numericValue(buf.currentCharacter) {
472420
buf.advance()
473-
if numeral == -1 {
474-
break
475-
}
476-
exponent *= 10
421+
exponent *= Double(base)
477422
exponent += Double(numeral)
478-
} while (isADigit(buf.currentCharacter))
423+
}
479424

480425
if exponent > 0 {
481-
let multiplier = pow(10, exponent)
426+
let multiplier = pow(Double(base), exponent)
482427
if negExponent {
483428
localResult /= T(multiplier)
484429
} else {
@@ -488,12 +433,24 @@ extension String {
488433
}
489434

490435
to(neg ? T(-1) * localResult : localResult)
491-
locationToScanFrom = buf.location
492436
return true
493437
}
494-
438+
439+
internal func scan<T: BinaryFloatingPoint>(_ skipSet: CharacterSet?, locale: Locale?, locationToScanFrom: inout Int, to: (T) -> Void) -> Bool {
440+
var buf = _NSStringBuffer(string: self, start: locationToScanFrom, end: length)
441+
let neg = checkForNegative(inBuffer: &buf, skipping: skipSet)
442+
let result = _scan(buffer: &buf, locale: locale, neg: neg, to: to, base: 10, numericValue: decimalValue)
443+
locationToScanFrom = buf.location
444+
return result
445+
}
446+
495447
internal func scanHex<T: BinaryFloatingPoint>(_ skipSet: CharacterSet?, locale: Locale?, locationToScanFrom: inout Int, to: (T) -> Void) -> Bool {
496-
NSUnimplemented()
448+
var buf = _NSStringBuffer(string: self, start: locationToScanFrom, end: length)
449+
let neg = checkForNegative(inBuffer: &buf, skipping: skipSet)
450+
skipHexStart(inBuffer: &buf)
451+
let result = _scan(buffer: &buf, locale: locale, neg: neg, to: to, base: 16, numericValue: decimalOrHexValue)
452+
locationToScanFrom = buf.location
453+
return result
497454
}
498455
}
499456

TestFoundation/TestScanner.swift

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,23 @@ class TestScanner : XCTestCase {
111111
expectTrue($0.isAtEnd, "The X was not consumed")
112112
}
113113
}
114+
115+
func testHexFloatingPoint() {
116+
withScanner(for: "0xAA 3.14 0.1x 1g 3xx .F00x 1e00 -0xabcdef.02") {
117+
expectEqual($0.scanDouble(representation: .hexadecimal), 0xAA, "Integer as Double")
118+
expectEqual($0.scanDouble(representation: .hexadecimal), 3.078125, "Double")
119+
expectEqual($0.scanDouble(representation: .hexadecimal), 0.0625, "Double")
120+
expectEqual($0.scanString("x"), "x", "Consume non-hex-digit")
121+
expectEqual($0.scanDouble(representation: .hexadecimal), Double(1), "Double")
122+
expectEqual($0.scanString("g"), "g", "Consume non-hex-digit")
123+
expectEqual($0.scanDouble(representation: .hexadecimal), Double(3), "Double")
124+
expectEqual($0.scanString("xx"), "xx", "Consume non-hex-digits")
125+
expectEqual($0.scanDouble(representation: .hexadecimal), 0.9375, "Double")
126+
expectEqual($0.scanString("x"), "x", "Consume non-hex-digit")
127+
expectEqual($0.scanDouble(representation: .hexadecimal), 0x1E00, "E is not for exponent")
128+
expectEqual($0.scanDouble(representation: .hexadecimal), -11259375.0078125, "negative decimal")
129+
}
130+
}
114131

115132
func testUInt64() {
116133
// UInt64 long sequence:
@@ -472,6 +489,7 @@ class TestScanner : XCTestCase {
472489
return [
473490
("testScanFloatingPoint", testScanFloatingPoint),
474491
("testHexRepresentation", testHexRepresentation),
492+
("testHexFloatingPoint", testHexFloatingPoint),
475493
("testUInt64", testUInt64),
476494
("testInt64", testInt64),
477495
("testInt32", testInt32),

0 commit comments

Comments
 (0)