-
-
Notifications
You must be signed in to change notification settings - Fork 18.6k
Ensure TDA.__init__ validates freq #24666
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
e2a63c5
09c2a37
9097bf0
86c5658
3d9cd7d
eba0c51
ab0c928
37352bb
f273a2c
3e9eaa1
02acf9d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,10 +15,9 @@ | |
from pandas.util._decorators import Appender | ||
|
||
from pandas.core.dtypes.common import ( | ||
_NS_DTYPE, _TD_DTYPE, ensure_int64, is_datetime64_dtype, is_float_dtype, | ||
is_integer_dtype, is_list_like, is_object_dtype, is_scalar, | ||
is_string_dtype, is_timedelta64_dtype, is_timedelta64_ns_dtype, | ||
pandas_dtype) | ||
_NS_DTYPE, _TD_DTYPE, ensure_int64, is_datetime64_dtype, is_dtype_equal, | ||
is_float_dtype, is_integer_dtype, is_list_like, is_object_dtype, is_scalar, | ||
is_timedelta64_dtype, is_timedelta64_ns_dtype, pandas_dtype) | ||
from pandas.core.dtypes.dtypes import DatetimeTZDtype | ||
from pandas.core.dtypes.generic import ( | ||
ABCDataFrame, ABCIndexClass, ABCSeries, ABCTimedeltaIndex) | ||
|
@@ -134,55 +133,39 @@ def dtype(self): | |
_attributes = ["freq"] | ||
|
||
def __init__(self, values, dtype=_TD_DTYPE, freq=None, copy=False): | ||
if isinstance(values, (ABCSeries, ABCIndexClass)): | ||
values = values._values | ||
|
||
if isinstance(values, type(self)): | ||
values, freq, freq_infer = extract_values_freq(values, freq) | ||
|
||
if not isinstance(values, np.ndarray): | ||
msg = ( | ||
if not hasattr(values, "dtype"): | ||
raise ValueError( | ||
"Unexpected type '{}'. 'values' must be a TimedeltaArray " | ||
"ndarray, or Series or Index containing one of those." | ||
) | ||
raise ValueError(msg.format(type(values).__name__)) | ||
|
||
if values.dtype == 'i8': | ||
# for compat with datetime/timedelta/period shared methods, | ||
# we can sometimes get here with int64 values. These represent | ||
# nanosecond UTC (or tz-naive) unix timestamps | ||
values = values.view(_TD_DTYPE) | ||
|
||
if values.dtype != _TD_DTYPE: | ||
raise TypeError(_BAD_DTYPE.format(dtype=values.dtype)) | ||
|
||
try: | ||
dtype_mismatch = dtype != _TD_DTYPE | ||
except TypeError: | ||
raise TypeError(_BAD_DTYPE.format(dtype=dtype)) | ||
else: | ||
if dtype_mismatch: | ||
raise TypeError(_BAD_DTYPE.format(dtype=dtype)) | ||
|
||
.format(type(values).__name__)) | ||
if freq == "infer": | ||
msg = ( | ||
raise ValueError( | ||
"Frequency inference not allowed in TimedeltaArray.__init__. " | ||
"Use 'pd.array()' instead." | ||
) | ||
raise ValueError(msg) | ||
"Use 'pd.array()' instead.") | ||
|
||
if copy: | ||
values = values.copy() | ||
if freq: | ||
freq = to_offset(freq) | ||
if dtype is not None and not is_dtype_equal(dtype, _TD_DTYPE): | ||
raise TypeError("dtype {dtype} cannot be converted to " | ||
"timedelta64[ns]".format(dtype=dtype)) | ||
|
||
if values.dtype == 'i8': | ||
values = values.view('timedelta64[ns]') | ||
|
||
self._data = values | ||
self._dtype = dtype | ||
self._freq = freq | ||
result = type(self)._from_sequence(values, dtype=dtype, | ||
copy=copy, freq=freq) | ||
self._data = result._data | ||
self._freq = result._freq | ||
self._dtype = result._dtype | ||
|
||
@classmethod | ||
def _simple_new(cls, values, freq=None, dtype=_TD_DTYPE): | ||
return cls(values, dtype=dtype, freq=freq) | ||
assert dtype is _TD_DTYPE, dtype | ||
assert isinstance(values, np.ndarray), type(values) | ||
|
||
result = object.__new__(cls) | ||
result._data = values.view(_TD_DTYPE) | ||
result._freq = to_offset(freq) | ||
result._dtype = _TD_DTYPE | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return result | ||
|
||
@classmethod | ||
def _from_sequence(cls, data, dtype=_TD_DTYPE, copy=False, | ||
|
@@ -860,17 +843,17 @@ def sequence_to_td64ns(data, copy=False, unit="ns", errors="raise"): | |
data = data._data | ||
|
||
# Convert whatever we have into timedelta64[ns] dtype | ||
if is_object_dtype(data) or is_string_dtype(data): | ||
if data.dtype.kind in ['S', 'O']: | ||
# no need to make a copy, need to convert if string-dtyped | ||
data = objects_to_td64ns(data, unit=unit, errors=errors) | ||
copy = False | ||
|
||
elif is_integer_dtype(data): | ||
elif data.dtype.kind == 'i': | ||
# treat as multiples of the given unit | ||
data, copy_made = ints_to_td64ns(data, unit=unit) | ||
copy = copy and not copy_made | ||
|
||
elif is_float_dtype(data): | ||
elif data.dtype.kind == 'f': | ||
# treat as multiples of the given unit. If after converting to nanos, | ||
# there are fractional components left, these are truncated | ||
# (i.e. NOT rounded) | ||
|
@@ -880,7 +863,7 @@ def sequence_to_td64ns(data, copy=False, unit="ns", errors="raise"): | |
data[mask] = iNaT | ||
copy = False | ||
|
||
elif is_timedelta64_dtype(data): | ||
elif data.dtype.kind == 'm': | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do these actually make a difference? I am -1 on changing these, this is the reason we have the is_* routines There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The is_foo_dtype call takes about 16x longer than this check. Tom says these checks are the main source of his perf concern. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As an alternative, if fastpath:
is_float = is_integer = is_object = is_string = is_timedelta = is_datetime = False
else:
is_object = is_object_dtype(data)
is_...
if is_object:
...
elif is_...
if not fastpath:
data = np.array(data, copy=copy)
elif copy:
data = data.copy() There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. or use the even-faster _simple_new? fastpath is a pattern we're still trying to deprecate elsewhere There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am -1 on actually changing to the in [....] checks. The entire point is consistency in use. I am not convinced these are actual perf issues in the real world. micro seconds on a single construction pales in comparision to inconsistent code. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If you can use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
We're deprecating that from public methods. sequnce_to_td64ns isn't public is it? |
||
if data.dtype != _TD_DTYPE: | ||
# non-nano unit | ||
# TODO: watch out for overflows | ||
|
@@ -998,18 +981,3 @@ def _generate_regular_range(start, end, periods, offset): | |
|
||
data = np.arange(b, e, stride, dtype=np.int64) | ||
return data | ||
|
||
|
||
def extract_values_freq(arr, freq): | ||
# type: (TimedeltaArray, Offset) -> Tuple[ndarray, Offset, bool] | ||
freq_infer = False | ||
if freq is None: | ||
freq = arr.freq | ||
elif freq and arr.freq: | ||
freq = to_offset(freq) | ||
freq, freq_infer = dtl.validate_inferred_freq( | ||
freq, arr.freq, | ||
freq_infer=False | ||
) | ||
values = arr._data | ||
return values, freq, freq_infer |
Uh oh!
There was an error while loading. Please reload this page.