-
-
Notifications
You must be signed in to change notification settings - Fork 18.6k
EA: revert treatment of i8values #24623
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
a562769
21d7228
891663b
401abd8
a46a2cc
27e5fb7
1715eba
8e6ca59
c109018
14cf136
20b91dd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -236,83 +236,51 @@ class DatetimeArray(dtl.DatetimeLikeArrayMixin, | |
_dtype = None # type: Union[np.dtype, DatetimeTZDtype] | ||
_freq = None | ||
|
||
def __init__(self, values, dtype=_NS_DTYPE, freq=None, copy=False): | ||
if isinstance(values, (ABCSeries, ABCIndexClass)): | ||
values = values._values | ||
|
||
if isinstance(values, type(self)): | ||
# validation | ||
dtz = getattr(dtype, 'tz', None) | ||
if dtz and values.tz is None: | ||
dtype = DatetimeTZDtype(tz=dtype.tz) | ||
elif dtz and values.tz: | ||
if not timezones.tz_compare(dtz, values.tz): | ||
msg = ( | ||
"Timezone of the array and 'dtype' do not match. " | ||
"'{}' != '{}'" | ||
) | ||
raise TypeError(msg.format(dtz, values.tz)) | ||
elif values.tz: | ||
dtype = values.dtype | ||
# freq = validate_values_freq(values, freq) | ||
if freq is None: | ||
freq = values.freq | ||
values = values._data | ||
def __init__(self, values, dtype=None, freq=None, copy=False): | ||
if freq == "infer": | ||
raise ValueError( | ||
"Frequency inference not allowed in DatetimeArray.__init__. " | ||
"Use 'pd.array()' instead.") | ||
|
||
if not isinstance(values, np.ndarray): | ||
msg = ( | ||
"Unexpected type '{}'. 'values' must be a DatetimeArray " | ||
if not hasattr(values, "dtype"): | ||
# e.g. list | ||
raise ValueError( | ||
"Unexpected type '{vals}'. 'values' must be a DatetimeArray " | ||
"ndarray, or Series or Index containing one of those." | ||
) | ||
raise ValueError(msg.format(type(values).__name__)) | ||
.format(vals=type(values).__name__)) | ||
|
||
if values.dtype == 'i8': | ||
# for compat with datetime/timedelta/period shared methods, | ||
# we can sometimes get here with int64 values. These represent | ||
# nanosecond UTC (or tz-naive) unix timestamps | ||
values = values.view(_NS_DTYPE) | ||
|
||
if values.dtype != _NS_DTYPE: | ||
msg = ( | ||
if values.dtype == np.bool_: | ||
raise ValueError( | ||
"The dtype of 'values' is incorrect. Must be 'datetime64[ns]'." | ||
" Got {} instead." | ||
) | ||
raise ValueError(msg.format(values.dtype)) | ||
" Got {dtype} instead." .format(dtype=values.dtype)) | ||
|
||
dtype = _validate_dt64_dtype(dtype) | ||
|
||
if freq == "infer": | ||
msg = ( | ||
"Frequency inference not allowed in DatetimeArray.__init__. " | ||
"Use 'pd.array()' instead." | ||
) | ||
raise ValueError(msg) | ||
|
||
if copy: | ||
values = values.copy() | ||
if freq: | ||
freq = to_offset(freq) | ||
if getattr(dtype, 'tz', None): | ||
# https://github.com/pandas-dev/pandas/issues/18595 | ||
# Ensure that we have a standard timezone for pytz objects. | ||
# Without this, things like adding an array of timedeltas and | ||
# a tz-aware Timestamp (with a tz specific to its datetime) will | ||
# be incorrect(ish?) for the array as a whole | ||
dtype = DatetimeTZDtype(tz=timezones.tz_standardize(dtype.tz)) | ||
|
||
self._data = values | ||
self._dtype = dtype | ||
self._freq = freq | ||
arr = type(self)._from_sequence(values, dtype=dtype, | ||
freq=freq, copy=copy) | ||
self._data = arr._data | ||
self._freq = arr._freq | ||
self._dtype = arr._dtype | ||
|
||
@classmethod | ||
def _simple_new(cls, values, freq=None, tz=None): | ||
def _simple_new(cls, values, freq=None, tz=None, dtype=None): | ||
""" | ||
we require the we have a dtype compat for the values | ||
if we are passed a non-dtype compat, then coerce using the constructor | ||
""" | ||
dtype = DatetimeTZDtype(tz=tz) if tz else _NS_DTYPE | ||
|
||
return cls(values, freq=freq, dtype=dtype) | ||
if tz is not None: | ||
# TODO: get tz out of here altogether | ||
assert dtype is None | ||
tz = timezones.tz_standardize(tz) | ||
dtype = DatetimeTZDtype(tz=tz) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this tz normalization needs to be applied to the case of user-provided There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK, this (roughly) passes on master, fails here def test_foo():
idx = pd.DatetimeIndex(['2000', '2001'], tz='US/Central')
t1 = pd.DatetimeTZDtype(tz=idx.tz)
t2 = pd.DatetimeTZDtype(tz=idx[0].tz)
x = pd.arrays.DatetimeArray._simple_new(idx.view("i8"), dtype=t1)
y = pd.arrays.DatetimeArray._simple_new(idx.view("i8"), dtype=t2)
assert x.dtype.tz == y.dtype.tz I think it should pass. |
||
elif dtype is None: | ||
dtype = _NS_DTYPE | ||
|
||
assert isinstance(values, np.ndarray), type(values) | ||
|
||
result = object.__new__(cls) | ||
result._data = values.view('datetime64[ns]') | ||
result._freq = freq | ||
result._dtype = dtype | ||
return result | ||
|
||
@classmethod | ||
def _from_sequence(cls, data, dtype=None, copy=False, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -340,7 +340,7 @@ def test_from_array_keeps_base(self): | |
arr = np.array(['2000-01-01', '2000-01-02'], dtype='M8[ns]') | ||
dta = DatetimeArray(arr) | ||
|
||
assert dta._data is arr | ||
assert dta._data.base is arr | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why this change? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. because a view of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. then why was it passing on master? Because you changed it to take a view? Is it important to do that? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because I changed it back to use _from_sequence, which uses sequence_to_dt64ns, which yes takes a view. And it is important in as much as in the PR implementing sequence_to_dt64ns when the original version only took a view if it was necessary, there was a request to do it unconditionally to save a line |
||
dta = DatetimeArray(arr[:0]) | ||
assert dta._data.base is arr | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why do u only check for book here? shouldn’t this. e inside _from_sequeneve?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Because up until moments ago I thought we had a test checking that
pd.to_datetime(False)
returnedNaT
. No idea where I got that idea... Just moved this check down.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
well, the good news is that I'm not hallucinating non-existent test cases. the relevant test was for
to_datetime(False, errors="coerce")
, so I'm moving this check back up to__init__
. To be clear, I don't think the three checks in__init__
should be there at all, but for the moment I'm trying to minimize behavior changes and just get the tests passing.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's not because
to_datetime
has that behaviour that there should be a check for that here?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's because
to_datetime
has acoerce
kwarg that specified fallback behavior. If we want to disallow bools (and other dtypes presumably, bools were just the only one tested in 24024) in DatetimeArray but not in sequence_to_dt64ns, the check needs to be here.