Skip to content

BUG: resolution inference with NaT ints/floats/strings #55981

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2633,6 +2633,7 @@ def maybe_convert_objects(ndarray[object] objects,
tsobj = convert_to_tsobject(val, None, None, 0, 0)
tsobj.ensure_reso(NPY_FR_ns)
except OutOfBoundsDatetime:
# e.g. test_out_of_s_bounds_datetime64
seen.object_ = True
break
else:
Expand Down
34 changes: 19 additions & 15 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -451,12 +451,12 @@ cpdef array_to_datetime(
Returns
-------
np.ndarray
May be datetime64[ns] or object dtype
May be datetime64[creso_unit] or object dtype
tzinfo or None
"""
cdef:
Py_ssize_t i, n = values.size
object val, tz
object val
ndarray[int64_t] iresult
npy_datetimestruct dts
bint utc_convert = bool(utc)
Expand All @@ -468,7 +468,7 @@ cpdef array_to_datetime(
_TSObject _ts
float tz_offset
set out_tzoffset_vals = set()
tzinfo tz_out = None
tzinfo tz, tz_out = None
cnp.flatiter it = cnp.PyArray_IterNew(values)
NPY_DATETIMEUNIT item_reso
bint infer_reso = creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC
Expand Down Expand Up @@ -523,15 +523,14 @@ cpdef array_to_datetime(

elif is_integer_object(val) or is_float_object(val):
# these must be ns unit by-definition
item_reso = NPY_FR_ns
state.update_creso(item_reso)
if infer_reso:
creso = state.creso

if val != val or val == NPY_NAT:
iresult[i] = NPY_NAT
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So we no longer need to update the creso state here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we delay until after the if val != val or val == NPY_NAT so as to treat these cases symmetrically with other NaTs

else:
# we now need to parse this as if unit='ns'
item_reso = NPY_FR_ns
state.update_creso(item_reso)
if infer_reso:
creso = state.creso
iresult[i] = cast_from_unit(val, "ns", out_reso=creso)
state.found_other = True

Expand All @@ -553,6 +552,16 @@ cpdef array_to_datetime(
_ts = convert_str_to_tsobject(
val, None, unit="ns", dayfirst=dayfirst, yearfirst=yearfirst
)

if _ts.value == NPY_NAT:
# e.g. "NaT" string or empty string, we do not consider
# this as either tzaware or tznaive. See
# test_to_datetime_with_empty_str_utc_false_format_mixed
# We also do not update resolution inference based on this,
# see test_infer_with_nat_int_float_str
iresult[i] = _ts.value
continue

item_reso = _ts.creso
state.update_creso(item_reso)
if infer_reso:
Expand All @@ -563,12 +572,7 @@ cpdef array_to_datetime(
iresult[i] = _ts.value

tz = _ts.tzinfo
if _ts.value == NPY_NAT:
# e.g. "NaT" string or empty string, we do not consider
# this as either tzaware or tznaive. See
# test_to_datetime_with_empty_str_utc_false_format_mixed
pass
elif tz is not None:
if tz is not None:
# dateutil timezone objects cannot be hashed, so
# store the UTC offsets in seconds instead
nsecs = tz.utcoffset(None).total_seconds()
Expand Down Expand Up @@ -641,7 +645,7 @@ cpdef array_to_datetime(
# Otherwise we can use the single reso that we encountered and avoid
# a second pass.
abbrev = npy_unit_to_abbrev(state.creso)
result = iresult.view(f"M8[{abbrev}]")
result = iresult.view(f"M8[{abbrev}]").reshape(result.shape)
return result, tz_out


Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/strptime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ cdef _get_format_regex(str fmt):


cdef class DatetimeParseState:
def __cinit__(self, NPY_DATETIMEUNIT creso=NPY_DATETIMEUNIT.NPY_FR_ns):
def __cinit__(self, NPY_DATETIMEUNIT creso):
# found_tz and found_naive are specifically about datetime/Timestamp
# objects with and without tzinfos attached.
self.found_tz = False
Expand Down
18 changes: 18 additions & 0 deletions pandas/tests/tslibs/test_array_to_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,24 @@ def test_infer_heterogeneous(self):
assert tz is None
tm.assert_numpy_array_equal(result, expected[::-1])

@pytest.mark.parametrize(
"item", [float("nan"), NaT.value, float(NaT.value), "NaT", ""]
)
def test_infer_with_nat_int_float_str(self, item):
# floats/ints get inferred to nanos *unless* they are NaN/iNaT,
# similar NaT string gets treated like NaT scalar (ignored for resolution)
dt = datetime(2023, 11, 15, 15, 5, 6)

arr = np.array([dt, item], dtype=object)
result, tz = tslib.array_to_datetime(arr, creso=creso_infer)
assert tz is None
expected = np.array([dt, np.datetime64("NaT")], dtype="M8[us]")
tm.assert_numpy_array_equal(result, expected)

result2, tz2 = tslib.array_to_datetime(arr[::-1], creso=creso_infer)
assert tz2 is None
tm.assert_numpy_array_equal(result2, expected[::-1])


class TestArrayToDatetimeWithTZResolutionInference:
def test_array_to_datetime_with_tz_resolution(self):
Expand Down