Skip to content

BUG: underflow on Timestamp creation #14433

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Oct 20, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.19.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ Bug Fixes
- ``pd.merge()`` will raise ``ValueError`` with non-boolean parameters in passed boolean type arguments (:issue:`14434`)


- Bug in ``Timestamp`` where dates very near the minimum (1677-09) could underflow on creation (:issue:`14415`)

- Bug in ``pd.concat`` where names of the ``keys`` were not propagated to the resulting ``MultiIndex`` (:issue:`14252`)
- Bug in ``pd.concat`` where ``axis`` cannot take string parameters ``'rows'`` or ``'columns'`` (:issue:`14369`)
Expand Down
9 changes: 2 additions & 7 deletions pandas/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -65,13 +65,8 @@ cdef int64_t NPY_NAT = util.get_nat()
ctypedef unsigned char UChar

cimport util
from util cimport is_array, _checknull, _checknan

cdef extern from "headers/stdint.h":
enum: UINT8_MAX
enum: INT64_MAX
enum: INT64_MIN

from util cimport (is_array, _checknull, _checknan, INT64_MAX,
INT64_MIN, UINT8_MAX)

cdef extern from "math.h":
double sqrt(double x)
Expand Down
21 changes: 14 additions & 7 deletions pandas/src/datetime/np_datetime.c
Original file line number Diff line number Diff line change
Expand Up @@ -846,7 +846,8 @@ convert_datetime_to_datetimestruct(pandas_datetime_metadata *meta,
dt = dt % perday;
}
else {
set_datetimestruct_days((dt - (perday-1)) / perday, out);
set_datetimestruct_days(dt / perday - (dt % perday == 0 ? 0 : 1),
out);
dt = (perday-1) + (dt + 1) % perday;
}
out->hour = dt;
Expand All @@ -860,7 +861,8 @@ convert_datetime_to_datetimestruct(pandas_datetime_metadata *meta,
dt = dt % perday;
}
else {
set_datetimestruct_days((dt - (perday-1)) / perday, out);
set_datetimestruct_days(dt / perday - (dt % perday == 0 ? 0 : 1),
out);
dt = (perday-1) + (dt + 1) % perday;
}
out->hour = dt / 60;
Expand All @@ -875,7 +877,8 @@ convert_datetime_to_datetimestruct(pandas_datetime_metadata *meta,
dt = dt % perday;
}
else {
set_datetimestruct_days((dt - (perday-1)) / perday, out);
set_datetimestruct_days(dt / perday - (dt % perday == 0 ? 0 : 1),
out);
dt = (perday-1) + (dt + 1) % perday;
}
out->hour = dt / (60*60);
Expand All @@ -891,7 +894,8 @@ convert_datetime_to_datetimestruct(pandas_datetime_metadata *meta,
dt = dt % perday;
}
else {
set_datetimestruct_days((dt - (perday-1)) / perday, out);
set_datetimestruct_days(dt / perday - (dt % perday == 0 ? 0 : 1),
out);
dt = (perday-1) + (dt + 1) % perday;
}
out->hour = dt / (60*60*1000LL);
Expand All @@ -908,7 +912,8 @@ convert_datetime_to_datetimestruct(pandas_datetime_metadata *meta,
dt = dt % perday;
}
else {
set_datetimestruct_days((dt - (perday-1)) / perday, out);
set_datetimestruct_days(dt / perday - (dt % perday == 0 ? 0 : 1),
out);
dt = (perday-1) + (dt + 1) % perday;
}
out->hour = dt / (60*60*1000000LL);
Expand All @@ -925,7 +930,8 @@ convert_datetime_to_datetimestruct(pandas_datetime_metadata *meta,
dt = dt % perday;
}
else {
set_datetimestruct_days((dt - (perday-1)) / perday, out);
set_datetimestruct_days(dt / perday - (dt % perday == 0 ? 0 : 1),
out);
dt = (perday-1) + (dt + 1) % perday;
}
out->hour = dt / (60*60*1000000000LL);
Expand All @@ -943,7 +949,8 @@ convert_datetime_to_datetimestruct(pandas_datetime_metadata *meta,
dt = dt % perday;
}
else {
set_datetimestruct_days((dt - (perday-1)) / perday, out);
set_datetimestruct_days(dt / perday - (dt % perday == 0 ? 0 : 1),
out);
dt = (perday-1) + (dt + 1) % perday;
}
out->hour = dt / (60*60*1000000000000LL);
Expand Down
16 changes: 3 additions & 13 deletions pandas/src/inference.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,9 @@ iNaT = util.get_nat()

cdef bint PY2 = sys.version_info[0] == 2

cdef extern from "headers/stdint.h":
enum: UINT8_MAX
enum: UINT16_MAX
enum: UINT32_MAX
enum: UINT64_MAX
enum: INT8_MIN
enum: INT8_MAX
enum: INT16_MIN
enum: INT16_MAX
enum: INT32_MAX
enum: INT32_MIN
enum: INT64_MAX
enum: INT64_MIN
from util cimport (UINT8_MAX, UINT16_MAX, UINT32_MAX, UINT64_MAX,
INT8_MIN, INT8_MAX, INT16_MIN, INT16_MAX,
INT32_MAX, INT32_MIN, INT64_MAX, INT64_MIN)

# core.common import for fast inference checks

Expand Down
14 changes: 14 additions & 0 deletions pandas/src/util.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,20 @@ ctypedef fused numeric:
cnp.float32_t
cnp.float64_t

cdef extern from "headers/stdint.h":
enum: UINT8_MAX
enum: UINT16_MAX
enum: UINT32_MAX
enum: UINT64_MAX
enum: INT8_MIN
enum: INT8_MAX
enum: INT16_MIN
enum: INT16_MAX
enum: INT32_MAX
enum: INT32_MIN
enum: INT64_MAX
enum: INT64_MIN

cdef inline object get_value_at(ndarray arr, object loc):
cdef:
Py_ssize_t i, sz
Expand Down
9 changes: 9 additions & 0 deletions pandas/tseries/tests/test_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -4463,6 +4463,15 @@ def test_basics_nanos(self):
self.assertEqual(stamp.microsecond, 0)
self.assertEqual(stamp.nanosecond, 500)

# GH 14415
val = np.iinfo(np.int64).min + 80000000000000
stamp = Timestamp(val)
self.assertEqual(stamp.year, 1677)
self.assertEqual(stamp.month, 9)
self.assertEqual(stamp.day, 21)
self.assertEqual(stamp.microsecond, 145224)
self.assertEqual(stamp.nanosecond, 192)

def test_unit(self):

def check(val, unit=None, h=1, s=1, us=0):
Expand Down
13 changes: 8 additions & 5 deletions pandas/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ from cpython cimport (
PyUnicode_AsUTF8String,
)


# Cython < 0.17 doesn't have this in cpython
cdef extern from "Python.h":
cdef PyTypeObject *Py_TYPE(object)
Expand All @@ -37,7 +38,7 @@ from datetime cimport cmp_pandas_datetimestruct
from libc.stdlib cimport free

from util cimport (is_integer_object, is_float_object, is_datetime64_object,
is_timedelta64_object)
is_timedelta64_object, INT64_MAX)
cimport util

from datetime cimport *
Expand Down Expand Up @@ -904,10 +905,12 @@ cpdef object get_value_box(ndarray arr, object loc):


# Add the min and max fields at the class level
# These are defined as magic numbers due to strange
# wraparound behavior when using the true int64 lower boundary
cdef int64_t _NS_LOWER_BOUND = -9223285636854775000LL
cdef int64_t _NS_UPPER_BOUND = 9223372036854775807LL
cdef int64_t _NS_UPPER_BOUND = INT64_MAX
# the smallest value we could actually represent is
# INT64_MIN + 1 == -9223372036854775807
# but to allow overflow free conversion with a microsecond resolution
# use the smallest value with a 0 nanosecond unit (0s in last 3 digits)
cdef int64_t _NS_LOWER_BOUND = -9223372036854775000

cdef pandas_datetimestruct _NS_MIN_DTS, _NS_MAX_DTS
pandas_datetime_to_datetimestruct(_NS_LOWER_BOUND, PANDAS_FR_ns, &_NS_MIN_DTS)
Expand Down