-
Notifications
You must be signed in to change notification settings - Fork 1.2k
[JSONSerialization] Use Int64 and UInt64 for integer parsing #1654
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@swift-ci Please test |
@swift-ci please test |
defer { int64EndPointer.deallocate() } | ||
let int64Result = strtoll(startPointer, int64EndPointer, 10) | ||
let int64Distance = startPointer.distance(to: int64EndPointer[0]!) | ||
if int64Distance == doubleDistance { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't believe pointer distances are sufficient to tell whether the numeric values are equivalent. For instance, on my machine:
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <stddef.h>
double parseDouble(const char *string, ptrdiff_t *length)
{
char *end = NULL;
double result = strtod(string, &end);
if (length) *length = end - string;
return result;
}
int64_t parseInt64(const char *string, ptrdiff_t *length)
{
char *end = NULL;
int64_t result = strtoll(string, &end, 10);
if (length) *length = end - string;
return result;
}
uint64_t parseUInt64(const char *string, ptrdiff_t *length)
{
char *end = NULL;
uint64_t result = strtoull(string, &end, 10);
if (length) *length = end - string;
return result;
}
int main(void)
{
const char *strings[] = {
"123.4",
"123.456",
"123",
"56",
"-9223372036854775808", // INT64_MIN
"9223372036854775807", // INT64_MAX
"18446744073709551615", // UINT64_MAX
};
for (size_t i = 0; i < sizeof(strings)/sizeof(strings[0]); i += 1) {
ptrdiff_t d_length = 0;
double d_result = parseDouble(strings[i], &d_length);
ptrdiff_t i_length = 0;
int64_t i_result = parseInt64(strings[i], &i_length);
ptrdiff_t u_length = 0;
uint64_t u_result = parseUInt64(strings[i], &u_length);
printf("%f (%ld)\t|\t%lld (%ld)\t|\t%llu (%ld)\n", d_result, d_length, i_result, i_length, u_result, u_length);
}
}
produces
123.400000 (5) | 123 (3) | 123 (3)
123.456000 (7) | 123 (3) | 123 (3)
123.000000 (3) | 123 (3) | 123 (3)
56.000000 (2) | 56 (2) | 56 (2)
-9223372036854775808.000000 (20) | -9223372036854775808 (20) | 9223372036854775808 (20)
9223372036854775808.000000 (19) | 9223372036854775807 (19) | 9223372036854775807 (19)
18446744073709551616.000000 (20) | 9223372036854775807 (20) | 18446744073709551615 (20)
For UINT64_MAX
, for instance, strtod
, strtoll
, and strtoull
all read the same number of characters but produce very different results — only strtoull
returns the correct results. This might've worked when we just had Double
and Int
, but no longer; we'll likely need to take a more involved approach here:
- Attempt
strtod
. If the result is 0 and*doubleEndPointer
is notNUL
, bail out (failure to parse); if the result is ±HUGE_VAL
anderrno
is set toERANGE
(and wasn't before), it's too large to fit in aDouble
, in which case bail - Attempt
strtoll
. If the result is 0 and*int64EndPointer
is notNUL
, return theDouble
value if parsed correctly (otherwise bail, failure to parse). If the result isLLONG_MAX
orLLONG_MIN
anderrno
is set toERANGE
then bail ifLLONG_MIN
(since the value was negative and won't be parsed correctly bystrtoull
). Also, if the string started with a-
, return this int value as there's no point in parsing as unsigned - Attempt
strtoull
. If the result is 0 and*uint64EndPointer
is notNUL
, bail; if the result isULLONG_MAX
anderrno
is set toERANGE
, bail. - At this point, if
doubleDistance
is > thanint64Distance
anduint64Distance
, return theDouble
value; otherwise, theInt64
value if it fit in anInt64
; otherwise, theUInt64
value
I don't remember if Swift has any good facilities for working with errno
as it is allowed to be a macro IIRC but I can take a look. I'll sketch this out in C to clarify the intent.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
To illustrate, this is what I had in mind (rough code, no testing yet):
func parseTypedNumber(_ address: UnsafePointer<UInt8>, count: Int) -> (Any, IndexDistance)? {
let temp_buffer_size = 64
var temp_buffer = [Int8](repeating: 0, count: temp_buffer_size)
return temp_buffer.withUnsafeMutableBufferPointer { (buffer: inout UnsafeMutableBufferPointer<Int8>) -> (Any, IndexDistance)? in
memcpy(buffer.baseAddress!, address, min(count, temp_buffer_size - 1)) // Ensure null termination
func parseDouble(_ buffer: UnsafePointer<Int8>) -> (result: Double, length: IndexDistance, overflow: Bool)? {
let originalErrno = errno
errno = 0
defer { errno = originalErrno }
var end: UnsafeMutablePointer<Int8>? = nil
let result = strtod(buffer, &end)
guard end![0] == 0 else {
// Parse failure since we require the buffer to be NUL-terminated.
return nil
}
let overflow = errno == ERANGE && result.magnitude == .greatestFiniteMagnitude
return (result, buffer.distance(to: end!), overflow)
}
func parseInt64(_ buffer: UnsafePointer<Int8>) -> (result: Int64, length: IndexDistance, overflow: Bool)? {
let originalErrno = errno
errno = 0
defer { errno = originalErrno }
var end: UnsafeMutablePointer<Int8>? = nil
let result = strtoll(buffer, &end, 10)
guard end![0] == 0 else {
// Parse failure since we require the buffer to be NUL-terminated.
return nil
}
let overflow = errno == ERANGE && (result == .max || result == .min)
return (result, buffer.distance(to: end!), overflow)
}
func parseUInt64(_ buffer: UnsafePointer<Int8>) -> (result: UInt64, length: IndexDistance, overflow: Bool)? {
let originalErrno = errno
errno = 0
defer { errno = originalErrno }
var end: UnsafeMutablePointer<Int8>? = nil
let result = strtoull(buffer, &end, 10)
guard end![0] == 0 else {
// Parse failure since we require the buffer to be NUL-terminated.
return nil
}
let overflow = errno == ERANGE && result == .max
return (result, buffer.distance(to: end!), overflow)
}
// If we can't parse as a Double or if the value would overflow a Double, there's no point in parsing as an integer.
guard let parsedDouble = parseDouble(buffer.baseAddress!), !parsedDouble.overflow else {
return nil
}
guard let parsedInt64 = parseInt64(buffer.baseAddress!) else {
// If we couldn't parse as an Int64 but succeeded as a Double, we must have a Double.
return (NSNumber(value: parsedDouble.result), parsedDouble.length)
}
if parsedInt64.overflow {
// We have something that looks like an integer but didn't fit in an Int64.
// If it's not a negative number, it might be worth parsing as a UInt64.
guard buffer.baseAddress![0] != 0x2D /* minus */,
let parsedUInt64 = parseUInt64(buffer.baseAddress!),
!parsedUInt64.overflow else {
// Only representable as a Double:
// * The value was negative, or
// * It failed to parse as a UInt64, or
// * It was too large even for a UInt64.
return (NSNumber(value: parsedDouble.result), parsedDouble.length)
}
return (NSNumber(value: parsedUInt64.result), parsedUInt64.length)
}
// We succeeded in parsing in an Int64 without overflow. Good enough.
return (NSNumber(value: parsedInt64.result), parsedInt64.length)
}
}
/cc @spevans
We can also introduce Decimal
into the above to better represent Double
values to preserve as much precision as possible.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
With the above code:
let strings = [
"123.4",
"123.456",
"123",
"56",
"-9223372036854775808", // INT64_MIN
"9223372036854775807", // INT64_MAX
"18446744073709551615", // UINT64_MAX
]
for string in strings {
string.withCString {
let length = strlen($0)
guard let (result, resultLength) = parseTypedNumber($0, count: length) else {
print("Failed to parse \(string)")
return
}
print("\(result) (\(Unicode.Scalar(UInt8((result as! NSNumber).objCType[0]))), \(resultLength))")
}
}
yields
123.4 (d, 5)
123.456 (d, 7)
123 (q, 3)
56 (q, 2)
-9223372036854775808 (q, 20)
9223372036854775807 (q, 19)
18446744073709551615 (Q, 20)
So this might get us a bit closer.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm, yes, this is quite a bit more involved than I'd thought. In any case, though, to match Darwin behavior fully, we would ultimately need something like #1655. This was meant to be an incremental improvement in the meantime that, given my responsibility for #1634, I could feasibly contribute in some spare time. However, @spevans is much further along here and I'll go ahead and close this.
A follow-up to #1634 to expand the range of integers parsed to align behavior more closely with Darwin, as suggested by @itaiferber.
(There will still be residual differences in parsing between macOS and Linux owing to use of
Decimal
in the former.)