Skip to content

BUG: datetime64 series reduces to nan when empty instead of nat #11245

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 11, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions doc/source/whatsnew/v0.17.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ Other Enhancements
API changes
~~~~~~~~~~~

- min and max reductions on ``datetime64`` and ``timedelta64`` dtyped series now
result in ``NaT`` and not ``nan`` (:issue:`11245`).

.. _whatsnew_0171.deprecations:

Deprecations
Expand Down Expand Up @@ -74,3 +77,5 @@ Bug Fixes


- Bugs in ``to_excel`` with duplicate columns (:issue:`11007`, :issue:`10982`, :issue:`10970`)
- Fixed a bug that prevented the construction of an empty series of dtype
``datetime64[ns, tz]`` (:issue:`11245`).
3 changes: 1 addition & 2 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1927,12 +1927,11 @@ def _possibly_cast_to_datetime(value, dtype, errors='raise'):
value = tslib.iNaT

# we have an array of datetime or timedeltas & nulls
elif np.prod(value.shape) and not is_dtype_equal(value.dtype, dtype):
elif np.prod(value.shape) or not is_dtype_equal(value.dtype, dtype):
try:
if is_datetime64:
value = to_datetime(value, errors=errors)._values
elif is_datetime64tz:

# input has to be UTC at this point, so just localize
value = to_datetime(value, errors=errors).tz_localize(dtype.tz)
elif is_timedelta64:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ class DatetimeTZDtype(ExtensionDtype):
num = 101
base = np.dtype('M8[ns]')
_metadata = ['unit','tz']
_match = re.compile("datetime64\[(?P<unit>.+), (?P<tz>.+)\]")
_match = re.compile("(datetime64|M8)\[(?P<unit>.+), (?P<tz>.+)\]")

def __init__(self, unit, tz=None):
"""
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4624,7 +4624,7 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
values = self.values
result = f(values)

if is_object_dtype(result.dtype):
if hasattr(result, 'dtype') and is_object_dtype(result.dtype):
try:
if filter_type is None or filter_type == 'numeric':
result = result.astype(np.float64)
Expand Down
67 changes: 18 additions & 49 deletions pandas/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,65 +425,34 @@ def nansem(values, axis=None, skipna=True, ddof=1):
return np.sqrt(var) / np.sqrt(count)


@bottleneck_switch()
def nanmin(values, axis=None, skipna=True):
values, mask, dtype, dtype_max = _get_values(values, skipna,
fill_value_typ='+inf')

# numpy 1.6.1 workaround in Python 3.x
if is_object_dtype(values) and compat.PY3:
if values.ndim > 1:
apply_ax = axis if axis is not None else 0
result = np.apply_along_axis(builtins.min, apply_ax, values)
else:
try:
result = builtins.min(values)
except:
result = np.nan
else:
def _nanminmax(meth, fill_value_typ):
@bottleneck_switch()
def reduction(values, axis=None, skipna=True):
values, mask, dtype, dtype_max = _get_values(
values,
skipna,
fill_value_typ=fill_value_typ,
)

if ((axis is not None and values.shape[axis] == 0)
or values.size == 0):
try:
result = ensure_float(values.sum(axis, dtype=dtype_max))
result = getattr(values, meth)(axis, dtype=dtype_max)
result.fill(np.nan)
except:
result = np.nan
else:
result = values.min(axis)
result = getattr(values, meth)(axis)

result = _wrap_results(result, dtype)
return _maybe_null_out(result, axis, mask)
result = _wrap_results(result, dtype)
return _maybe_null_out(result, axis, mask)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have already wrapped the types by the time we call maybe_null_out. The result will already be coerced so I think the is check is safe.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, then your check is good. thxs


reduction.__name__ = 'nan' + meth
return reduction

@bottleneck_switch()
def nanmax(values, axis=None, skipna=True):
values, mask, dtype, dtype_max = _get_values(values, skipna,
fill_value_typ='-inf')

# numpy 1.6.1 workaround in Python 3.x
if is_object_dtype(values) and compat.PY3:

if values.ndim > 1:
apply_ax = axis if axis is not None else 0
result = np.apply_along_axis(builtins.max, apply_ax, values)
else:
try:
result = builtins.max(values)
except:
result = np.nan
else:
if ((axis is not None and values.shape[axis] == 0)
or values.size == 0):
try:
result = ensure_float(values.sum(axis, dtype=dtype_max))
result.fill(np.nan)
except:
result = np.nan
else:
result = values.max(axis)

result = _wrap_results(result, dtype)
return _maybe_null_out(result, axis, mask)
nanmin = _nanminmax('min', fill_value_typ='+inf')
nanmax = _nanminmax('max', fill_value_typ='-inf')


def nanargmax(values, axis=None, skipna=True):
Expand Down Expand Up @@ -637,7 +606,7 @@ def _maybe_null_out(result, axis, mask):
else:
result = result.astype('f8')
result[null_mask] = np.nan
else:
elif result is not tslib.NaT:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if we started with M8/m8 and then do a .view('i8') this needs to be compared to pd.lib.iNaT, not sure why you are not hitting this here

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so this should be elif not (result is tslib.NaT or result is tslib.iNaT)?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

== tslib.iNaT

but puzzled why a NaT is there
as I don't think it's wrapped yet

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you take a ts and take a view as an int and then reduce, I think you still want nan. The only reason that you would want a NaT is that the dtype of the sequence being reduced is datetime64.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I see what you are saying, you were suggesting:

elif result.view('i8') == tslib.iNaT

It looks like result is still just a timestamp at this point so it will be the NaT object. I don't know if this is a guarantee or not.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no I mean result should already be an int if it's M8 as wrapping is the last step

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok then

null_mask = mask.size - mask.sum()
if null_mask == 0:
result = np.nan
Expand Down
10 changes: 10 additions & 0 deletions pandas/tests/test_dtypes.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from itertools import product

import nose
import numpy as np
Expand Down Expand Up @@ -148,6 +149,15 @@ def test_dst(self):
self.assertTrue(is_datetimetz(s2))
self.assertEqual(s1.dtype, s2.dtype)

def test_parser(self):
# pr #11245
for tz, constructor in product(('UTC', 'US/Eastern'),
('M8', 'datetime64')):
self.assertEqual(
DatetimeTZDtype('%s[ns, %s]' % (constructor, tz)),
DatetimeTZDtype('ns', tz),
)




Expand Down
6 changes: 6 additions & 0 deletions pandas/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -7960,6 +7960,12 @@ def test_datetime_timedelta_quantiles(self):
self.assertTrue(pd.isnull(Series([],dtype='M8[ns]').quantile(.5)))
self.assertTrue(pd.isnull(Series([],dtype='m8[ns]').quantile(.5)))

def test_empty_timeseries_redections_return_nat(self):
# covers #11245
for dtype in ('m8[ns]', 'm8[ns]', 'M8[ns]', 'M8[ns, UTC]'):
self.assertIs(Series([], dtype=dtype).min(), pd.NaT)
self.assertIs(Series([], dtype=dtype).max(), pd.NaT)

if __name__ == '__main__':
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
exit=False)