Skip to content

REF: de-duplicate ndarray[datetimelike] wrapping #38129

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Nov 29, 2020
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 3 additions & 5 deletions pandas/core/arrays/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
from pandas.core.arrays.base import ExtensionArray, _extension_array_shared_docs
from pandas.core.arrays.categorical import Categorical
import pandas.core.common as com
from pandas.core.construction import array, extract_array
from pandas.core.construction import array, extract_array, wrap_if_datetimelike
from pandas.core.indexers import check_array_indexer
from pandas.core.indexes.base import ensure_index
from pandas.core.ops import invalid_comparison, unpack_zerodim_and_defer
Expand Down Expand Up @@ -251,11 +251,9 @@ def _simple_new(
raise ValueError(msg)

# For dt64/td64 we want DatetimeArray/TimedeltaArray instead of ndarray
from pandas.core.ops.array_ops import maybe_upcast_datetimelike_array

left = maybe_upcast_datetimelike_array(left)
left = wrap_if_datetimelike(left)
left = extract_array(left, extract_numpy=True)
right = maybe_upcast_datetimelike_array(right)
right = wrap_if_datetimelike(right)
right = extract_array(right, extract_numpy=True)

lbase = getattr(left, "_ndarray", left).base
Expand Down
18 changes: 18 additions & 0 deletions pandas/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,24 @@ def extract_array(obj: object, extract_numpy: bool = False) -> Union[Any, ArrayL
return obj


def wrap_if_datetimelike(arr):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

agree with joris that maybe_* is pretty much a convention and would go with that nomenclature.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe_do_x usually means "try to do x and fallback on failure". i think wrap_if_datetimelike is more precise on this point.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok how about
ensure_datetimelike_index then

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you type the arg (and return dtype)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not really, its unrestricted

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

aren't these always TDI/DTI on the return type? (sure input is just ArrayLike)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no, we call it on arbitrary inputs in ops.array_ops

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok then i am misunderstanding how this is used and its definetly maybe_wrap_if_datetimelike, its either ensure or maybe, really shouldn't be any other choices.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok will update

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

renamed + greenish

"""
Wrap datetime64 and timedelta64 ndarrays in DatetimeArray/TimedeltaArray.
"""
if isinstance(arr, np.ndarray):
if arr.dtype.kind == "M":
from pandas.core.arrays import DatetimeArray

return DatetimeArray._from_sequence(arr)

elif arr.dtype.kind == "m":
from pandas.core.arrays import TimedeltaArray

return TimedeltaArray._from_sequence(arr)

return arr


def sanitize_array(
data,
index: Optional[Index],
Expand Down
21 changes: 4 additions & 17 deletions pandas/core/dtypes/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

from pandas.core.arrays import ExtensionArray
from pandas.core.arrays.sparse import SparseArray
from pandas.core.construction import array
from pandas.core.construction import array, wrap_if_datetimelike


def _get_dtype_kinds(arrays) -> Set[str]:
Expand Down Expand Up @@ -364,12 +364,13 @@ def _concat_datetime(to_concat, axis=0, typs=None):
if typs is None:
typs = _get_dtype_kinds(to_concat)

to_concat = [_wrap_datetimelike(x) for x in to_concat]
to_concat = [wrap_if_datetimelike(x) for x in to_concat]
single_dtype = len({x.dtype for x in to_concat}) == 1

# multiple types, need to coerce to object
if not single_dtype:
# wrap_datetimelike ensures that astype(object) wraps in Timestamp/Timedelta
# wrap_if_datetimelike ensures that astype(object) wraps
# in Timestamp/Timedelta
return _concatenate_2d([x.astype(object) for x in to_concat], axis=axis)

if axis == 1:
Expand All @@ -383,17 +384,3 @@ def _concat_datetime(to_concat, axis=0, typs=None):
assert result.shape[0] == 1
result = result[0]
return result


def _wrap_datetimelike(arr):
"""
Wrap datetime64 and timedelta64 ndarrays in DatetimeArray/TimedeltaArray.

DTA/TDA handle .astype(object) correctly.
"""
from pandas.core.construction import array as pd_array, extract_array

arr = extract_array(arr, extract_numpy=True)
if isinstance(arr, np.ndarray) and arr.dtype.kind in ["m", "M"]:
arr = pd_array(arr)
return arr
34 changes: 5 additions & 29 deletions pandas/core/ops/array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from pandas.core.dtypes.generic import ABCExtensionArray, ABCIndexClass, ABCSeries
from pandas.core.dtypes.missing import isna, notna

from pandas.core.construction import wrap_if_datetimelike
from pandas.core.ops import missing
from pandas.core.ops.dispatch import should_extension_dispatch
from pandas.core.ops.invalid import invalid_comparison
Expand Down Expand Up @@ -175,8 +176,8 @@ def arithmetic_op(left: ArrayLike, right: Any, op):

# NB: We assume that extract_array has already been called
# on `left` and `right`.
lvalues = maybe_upcast_datetimelike_array(left)
rvalues = maybe_upcast_datetimelike_array(right)
lvalues = wrap_if_datetimelike(left)
rvalues = wrap_if_datetimelike(right)
rvalues = _maybe_upcast_for_op(rvalues, lvalues.shape)

if should_extension_dispatch(lvalues, rvalues) or isinstance(rvalues, Timedelta):
Expand Down Expand Up @@ -206,7 +207,7 @@ def comparison_op(left: ArrayLike, right: Any, op) -> ArrayLike:
ndarray or ExtensionArray
"""
# NB: We assume extract_array has already been called on left and right
lvalues = maybe_upcast_datetimelike_array(left)
lvalues = wrap_if_datetimelike(left)
rvalues = right

rvalues = lib.item_from_zerodim(rvalues)
Expand Down Expand Up @@ -331,7 +332,7 @@ def fill_bool(x, left=None):
right = construct_1d_object_array_from_listlike(right)

# NB: We assume extract_array has already been called on left and right
lvalues = maybe_upcast_datetimelike_array(left)
lvalues = wrap_if_datetimelike(left)
rvalues = right

if should_extension_dispatch(lvalues, rvalues):
Expand Down Expand Up @@ -400,31 +401,6 @@ def get_array_op(op):
raise NotImplementedError(op_name)


def maybe_upcast_datetimelike_array(obj: ArrayLike) -> ArrayLike:
"""
If we have an ndarray that is either datetime64 or timedelta64, wrap in EA.

Parameters
----------
obj : ndarray or ExtensionArray

Returns
-------
ndarray or ExtensionArray
"""
if isinstance(obj, np.ndarray):
if obj.dtype.kind == "m":
from pandas.core.arrays import TimedeltaArray

return TimedeltaArray._from_sequence(obj)
if obj.dtype.kind == "M":
from pandas.core.arrays import DatetimeArray

return DatetimeArray._from_sequence(obj)

return obj


def _maybe_upcast_for_op(obj, shape: Shape):
"""
Cast non-pandas objects to pandas types to unify behavior of arithmetic
Expand Down