Skip to content

Commit ad992f4

Browse files
committed
[stdlib][SR-7556] Re-implement string-to-integer parsing.
1 parent 2f6fb2d commit ad992f4

File tree

1 file changed

+228
-94
lines changed

1 file changed

+228
-94
lines changed

stdlib/public/core/IntegerParsing.swift

Lines changed: 228 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -2,23 +2,243 @@
22
//
33
// This source file is part of the Swift.org open source project
44
//
5-
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
5+
// Copyright (c) 2014 - 2021 Apple Inc. and the Swift project authors
66
// Licensed under Apache License v2.0 with Runtime Library Exception
77
//
88
// See https://swift.org/LICENSE.txt for license information
99
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
1010
//
1111
//===----------------------------------------------------------------------===//
1212

13+
// N.B.: This free function has been manually specialized below where
14+
// `UTF8CodeUnits == UnsafeBufferPoint<UInt8>`. Ensure that any changes are
15+
// made in sync.
16+
@_alwaysEmitIntoClient
17+
internal func _parseASCIIDigits<
18+
UTF8CodeUnits: Sequence, Result: FixedWidthInteger
19+
>(
20+
_ codeUnits: UTF8CodeUnits, radix: Int, isNegative: Bool
21+
) -> Result? where UTF8CodeUnits.Element == UInt8 {
22+
_internalInvariant(radix >= 2 && radix <= 36)
23+
let multiplicand = Result(radix)
24+
var result = 0 as Result
25+
if radix <= 10 {
26+
let upperBound = 48 /* "0" */ &+ UInt8(radix)
27+
for digit in codeUnits {
28+
let digitValue: Result
29+
if _fastPath(digit >= 48 && digit < upperBound) {
30+
digitValue = Result(digit &- 48)
31+
} else {
32+
return nil
33+
}
34+
let (temporary, overflow1) =
35+
result.multipliedReportingOverflow(by: multiplicand)
36+
guard _fastPath(!overflow1) else { return nil }
37+
let (nextResult, overflow2) = isNegative
38+
? temporary.subtractingReportingOverflow(digitValue)
39+
: temporary.addingReportingOverflow(digitValue)
40+
guard _fastPath(!overflow2) else { return nil }
41+
result = nextResult
42+
}
43+
} else {
44+
let uppercaseUpperBound = 65 /* "A" */ &+ UInt8(radix &- 10)
45+
let lowercaseUpperBound = 97 /* "a" */ &+ UInt8(radix &- 10)
46+
for digit in codeUnits {
47+
let digitValue: Result
48+
if _fastPath(digit >= 48 /* "0" */ && digit < 58) {
49+
digitValue = Result(digit &- 48)
50+
} else if _fastPath(digit >= 65 && digit < uppercaseUpperBound) {
51+
digitValue = Result(digit &- 65 &+ 10)
52+
} else if _fastPath(digit >= 97 && digit < lowercaseUpperBound) {
53+
digitValue = Result(digit &- 97 &+ 10)
54+
} else {
55+
return nil
56+
}
57+
let (temporary, overflow1) =
58+
result.multipliedReportingOverflow(by: multiplicand)
59+
guard _fastPath(!overflow1) else { return nil }
60+
let (nextResult, overflow2) = isNegative
61+
? temporary.subtractingReportingOverflow(digitValue)
62+
: temporary.addingReportingOverflow(digitValue)
63+
guard _fastPath(!overflow2) else { return nil }
64+
result = nextResult
65+
}
66+
}
67+
return result
68+
}
69+
70+
// N.B.: This free function is a manually specialized version of the function
71+
// above. Ensure that any changes are made in sync.
72+
@_alwaysEmitIntoClient
73+
internal func _parseASCIIDigits<Result: FixedWidthInteger>(
74+
_ codeUnits: UnsafeBufferPointer<UInt8>, radix: Int, isNegative: Bool
75+
) -> Result? {
76+
_internalInvariant(radix >= 2 && radix <= 36)
77+
let multiplicand = Result(radix)
78+
var result = 0 as Result
79+
if radix <= 10 {
80+
let upperBound = 48 /* "0" */ &+ UInt8(radix)
81+
for digit in codeUnits {
82+
let digitValue: Result
83+
if _fastPath(digit >= 48 && digit < upperBound) {
84+
digitValue = Result(digit &- 48)
85+
} else {
86+
return nil
87+
}
88+
let (temporary, overflow1) =
89+
result.multipliedReportingOverflow(by: multiplicand)
90+
guard _fastPath(!overflow1) else { return nil }
91+
let (nextResult, overflow2) = isNegative
92+
? temporary.subtractingReportingOverflow(digitValue)
93+
: temporary.addingReportingOverflow(digitValue)
94+
guard _fastPath(!overflow2) else { return nil }
95+
result = nextResult
96+
}
97+
} else {
98+
let uppercaseUpperBound = 65 /* "A" */ &+ UInt8(radix &- 10)
99+
let lowercaseUpperBound = 97 /* "a" */ &+ UInt8(radix &- 10)
100+
for digit in codeUnits {
101+
let digitValue: Result
102+
if _fastPath(digit >= 48 /* "0" */ && digit < 58) {
103+
digitValue = Result(digit &- 48)
104+
} else if _fastPath(digit >= 65 && digit < uppercaseUpperBound) {
105+
digitValue = Result(digit &- 65 &+ 10)
106+
} else if _fastPath(digit >= 97 && digit < lowercaseUpperBound) {
107+
digitValue = Result(digit &- 97 &+ 10)
108+
} else {
109+
return nil
110+
}
111+
let (temporary, overflow1) =
112+
result.multipliedReportingOverflow(by: multiplicand)
113+
guard _fastPath(!overflow1) else { return nil }
114+
let (nextResult, overflow2) = isNegative
115+
? temporary.subtractingReportingOverflow(digitValue)
116+
: temporary.addingReportingOverflow(digitValue)
117+
guard _fastPath(!overflow2) else { return nil }
118+
result = nextResult
119+
}
120+
}
121+
return result
122+
}
123+
124+
@_alwaysEmitIntoClient
125+
internal func _parseASCII<UTF8CodeUnits: Collection, Result: FixedWidthInteger>(
126+
_ codeUnits: UTF8CodeUnits, radix: Int
127+
) -> Result? where UTF8CodeUnits.Element == UInt8 {
128+
_internalInvariant(!codeUnits.isEmpty)
129+
let first = codeUnits.first!
130+
if first == 45 /* "-" */ {
131+
return _parseASCIIDigits(
132+
codeUnits.dropFirst(), radix: radix, isNegative: true)
133+
}
134+
if first == 43 /* "+" */ {
135+
return _parseASCIIDigits(
136+
codeUnits.dropFirst(), radix: radix, isNegative: false)
137+
}
138+
return _parseASCIIDigits(codeUnits, radix: radix, isNegative: false)
139+
}
140+
141+
@_alwaysEmitIntoClient
142+
internal func _parseASCII<Result: FixedWidthInteger>(
143+
_ codeUnits: UnsafeBufferPointer<UInt8>, radix: Int
144+
) -> Result? {
145+
_internalInvariant(!codeUnits.isEmpty)
146+
let first = codeUnits[0]
147+
if first == 45 /* "-" */ {
148+
return _parseASCIIDigits(
149+
UnsafeBufferPointer(rebasing: codeUnits[1...]),
150+
radix: radix, isNegative: true)
151+
}
152+
if first == 43 /* "+" */ {
153+
return _parseASCIIDigits(
154+
UnsafeBufferPointer(rebasing: codeUnits[1...]),
155+
radix: radix, isNegative: false)
156+
}
157+
return _parseASCIIDigits(codeUnits, radix: radix, isNegative: false)
158+
}
159+
160+
extension FixedWidthInteger {
161+
/// Creates a new integer value from the given string and radix.
162+
///
163+
/// The string passed as `text` may begin with a plus or minus sign character
164+
/// (`+` or `-`), followed by one or more numeric digits (`0-9`) or letters
165+
/// (`a-z` or `A-Z`). Parsing of the string is case insensitive.
166+
///
167+
/// let x = Int("123")
168+
/// // x == 123
169+
///
170+
/// let y = Int("-123", radix: 8)
171+
/// // y == -83
172+
/// let y = Int("+123", radix: 8)
173+
/// // y == +83
174+
///
175+
/// let z = Int("07b", radix: 16)
176+
/// // z == 123
177+
///
178+
/// If `text` is in an invalid format or contains characters that are out of
179+
/// bounds for the given `radix`, or if the value it denotes in the given
180+
/// `radix` is not representable, the result is `nil`. For example, the
181+
/// following conversions result in `nil`:
182+
///
183+
/// Int(" 100") // Includes whitespace
184+
/// Int("21-50") // Invalid format
185+
/// Int("ff6600") // Characters out of bounds
186+
/// Int("zzzzzzzzzzzzz", radix: 36) // Out of range
187+
///
188+
/// - Parameters:
189+
/// - text: The ASCII representation of a number in the radix passed as
190+
/// `radix`.
191+
/// - radix: The radix, or base, to use for converting `text` to an integer
192+
/// value. `radix` must be in the range `2...36`. The default is 10.
193+
@inlinable
194+
public init?<S: StringProtocol>(_ text: S, radix: Int = 10) {
195+
_precondition(2...36 ~= radix, "Radix not in range 2...36")
196+
guard _fastPath(!text.isEmpty) else { return nil }
197+
let result: Self? =
198+
text.utf8.withContiguousStorageIfAvailable {
199+
_parseASCII($0, radix: radix)
200+
} ?? _parseASCII(text.utf8, radix: radix)
201+
guard let result_ = result else { return nil }
202+
self = result_
203+
}
204+
205+
/// Creates a new integer value from the given string.
206+
///
207+
/// The string passed as `description` may begin with a plus or minus sign
208+
/// character (`+` or `-`), followed by one or more numeric digits (`0-9`).
209+
///
210+
/// let x = Int("123")
211+
/// // x == 123
212+
///
213+
/// If `description` is in an invalid format, or if the value it denotes in
214+
/// base 10 is not representable, the result is `nil`. For example, the
215+
/// following conversions result in `nil`:
216+
///
217+
/// Int(" 100") // Includes whitespace
218+
/// Int("21-50") // Invalid format
219+
/// Int("ff6600") // Characters out of bounds
220+
/// Int("10000000000000000000000000") // Out of range
221+
///
222+
/// - Parameter description: The ASCII representation of a number.
223+
@inlinable
224+
@inline(__always)
225+
public init?(_ description: String) {
226+
self.init(description, radix: 10)
227+
}
228+
}
229+
230+
// -----------------------------------------------------------------------------
231+
// Old entry points preserved for ABI compatibility.
232+
// -----------------------------------------------------------------------------
233+
13234
/// Returns c as a UTF16.CodeUnit. Meant to be used as _ascii16("x").
14-
@inlinable
235+
@usableFromInline
15236
internal func _ascii16(_ c: Unicode.Scalar) -> UTF16.CodeUnit {
16237
_internalInvariant(c.value >= 0 && c.value <= 0x7F, "not ASCII")
17238
return UTF16.CodeUnit(c.value)
18239
}
19240

20-
@inlinable
21-
@inline(__always)
241+
@usableFromInline
22242
internal func _asciiDigit<CodeUnit: UnsignedInteger, Result: BinaryInteger>(
23243
codeUnit u_: CodeUnit, radix: Result
24244
) -> Result? {
@@ -36,8 +256,7 @@ internal func _asciiDigit<CodeUnit: UnsignedInteger, Result: BinaryInteger>(
36256
return Result(truncatingIfNeeded: d)
37257
}
38258

39-
@inlinable
40-
@inline(__always)
259+
@usableFromInline
41260
internal func _parseUnsignedASCII<
42261
Rest: IteratorProtocol, Result: FixedWidthInteger
43262
>(
@@ -68,12 +287,11 @@ where Rest.Element: UnsignedInteger {
68287
}
69288

70289
//
71-
// TODO (TODO: JIRA): This needs to be completely rewritten. It's about 20KB of
72-
// always-inline code, most of which are MOV instructions.
290+
// Before it was superseded, this function was about 20KB of always-inline code,
291+
// most of which were MOV instructions.
73292
//
74293

75-
@inlinable
76-
@inline(__always)
294+
@usableFromInline
77295
internal func _parseASCII<
78296
CodeUnits: IteratorProtocol, Result: FixedWidthInteger
79297
>(
@@ -113,88 +331,4 @@ extension FixedWidthInteger {
113331
where CodeUnits.Element: UnsignedInteger {
114332
return _parseASCII(codeUnits: &codeUnits, radix: radix)
115333
}
116-
117-
/// Creates a new integer value from the given string and radix.
118-
///
119-
/// The string passed as `text` may begin with a plus or minus sign character
120-
/// (`+` or `-`), followed by one or more numeric digits (`0-9`) or letters
121-
/// (`a-z` or `A-Z`). Parsing of the string is case insensitive.
122-
///
123-
/// let x = Int("123")
124-
/// // x == 123
125-
///
126-
/// let y = Int("-123", radix: 8)
127-
/// // y == -83
128-
/// let y = Int("+123", radix: 8)
129-
/// // y == +83
130-
///
131-
/// let z = Int("07b", radix: 16)
132-
/// // z == 123
133-
///
134-
/// If `text` is in an invalid format or contains characters that are out of
135-
/// bounds for the given `radix`, or if the value it denotes in the given
136-
/// `radix` is not representable, the result is `nil`. For example, the
137-
/// following conversions result in `nil`:
138-
///
139-
/// Int(" 100") // Includes whitespace
140-
/// Int("21-50") // Invalid format
141-
/// Int("ff6600") // Characters out of bounds
142-
/// Int("zzzzzzzzzzzzz", radix: 36) // Out of range
143-
///
144-
/// - Parameters:
145-
/// - text: The ASCII representation of a number in the radix passed as
146-
/// `radix`.
147-
/// - radix: The radix, or base, to use for converting `text` to an integer
148-
/// value. `radix` must be in the range `2...36`. The default is 10.
149-
@inlinable // @specializable
150-
@_semantics("optimize.sil.specialize.generic.partial.never")
151-
public init?<S: StringProtocol>(_ text: S, radix: Int = 10) {
152-
_precondition(2...36 ~= radix, "Radix not in range 2...36")
153-
154-
if let str = text as? String, str._guts.isFastUTF8 {
155-
guard let ret = str._guts.withFastUTF8 ({ utf8 -> Self? in
156-
var iter = utf8.makeIterator()
157-
return _parseASCII(codeUnits: &iter, radix: Self(radix))
158-
}) else {
159-
return nil
160-
}
161-
self = ret
162-
return
163-
}
164-
165-
// TODO(String performance): We can provide fast paths for common radices,
166-
// native UTF-8 storage, etc.
167-
168-
var iter = text.utf8.makeIterator()
169-
guard let ret = Self._parseASCIISlowPath(
170-
codeUnits: &iter, radix: Self(radix)
171-
) else { return nil }
172-
173-
self = ret
174-
}
175-
176-
/// Creates a new integer value from the given string.
177-
///
178-
/// The string passed as `description` may begin with a plus or minus sign
179-
/// character (`+` or `-`), followed by one or more numeric digits (`0-9`).
180-
///
181-
/// let x = Int("123")
182-
/// // x == 123
183-
///
184-
/// If `description` is in an invalid format, or if the value it denotes in
185-
/// base 10 is not representable, the result is `nil`. For example, the
186-
/// following conversions result in `nil`:
187-
///
188-
/// Int(" 100") // Includes whitespace
189-
/// Int("21-50") // Invalid format
190-
/// Int("ff6600") // Characters out of bounds
191-
/// Int("10000000000000000000000000") // Out of range
192-
///
193-
/// - Parameter description: The ASCII representation of a number.
194-
@inlinable
195-
@_semantics("optimize.sil.specialize.generic.partial.never")
196-
@inline(__always)
197-
public init?(_ description: String) {
198-
self.init(description, radix: 10)
199-
}
200334
}

0 commit comments

Comments
 (0)