-
-
Notifications
You must be signed in to change notification settings - Fork 18.6k
REF: Index.__new__ #38665
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
REF: Index.__new__ #38665
Changes from 3 commits
0542c4b
c897c60
7e5b6b9
159a65b
86c28e6
cb89ead
8dc0330
6edc9a7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -66,6 +66,12 @@ | |
validate_all_hashable, | ||
) | ||
from pandas.core.dtypes.concat import concat_compat | ||
from pandas.core.dtypes.dtypes import ( | ||
CategoricalDtype, | ||
DatetimeTZDtype, | ||
IntervalDtype, | ||
PeriodDtype, | ||
) | ||
from pandas.core.dtypes.generic import ( | ||
ABCDatetimeIndex, | ||
ABCMultiIndex, | ||
|
@@ -331,12 +337,6 @@ def __new__( | |
|
||
# index-like | ||
elif isinstance(data, (np.ndarray, Index, ABCSeries)): | ||
# Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423 | ||
from pandas.core.indexes.numeric import ( | ||
Float64Index, | ||
Int64Index, | ||
UInt64Index, | ||
) | ||
|
||
if dtype is not None: | ||
# we need to avoid having numpy coerce | ||
|
@@ -347,42 +347,31 @@ def __new__( | |
data = _maybe_cast_with_dtype(data, dtype, copy) | ||
dtype = data.dtype # TODO: maybe not for object? | ||
|
||
# maybe coerce to a sub-class | ||
if is_signed_integer_dtype(data.dtype): | ||
return Int64Index(data, copy=copy, dtype=dtype, name=name) | ||
elif is_unsigned_integer_dtype(data.dtype): | ||
return UInt64Index(data, copy=copy, dtype=dtype, name=name) | ||
elif is_float_dtype(data.dtype): | ||
return Float64Index(data, copy=copy, dtype=dtype, name=name) | ||
elif issubclass(data.dtype.type, bool) or is_bool_dtype(data): | ||
subarr = data.astype("object") | ||
if data.dtype.kind in ["i", "u", "f"]: | ||
# maybe coerce to a sub-class | ||
arr = data | ||
else: | ||
subarr = com.asarray_tuplesafe(data, dtype=object) | ||
|
||
# asarray_tuplesafe does not always copy underlying data, | ||
# so need to make sure that this happens | ||
if copy: | ||
subarr = subarr.copy() | ||
arr = com.asarray_tuplesafe(data, dtype=object) | ||
|
||
if dtype is None: | ||
new_data, new_dtype = _maybe_cast_data_without_dtype(subarr) | ||
if new_dtype is not None: | ||
if dtype is None: | ||
new_data = _maybe_cast_data_without_dtype(arr) | ||
new_dtype = new_data.dtype | ||
return cls( | ||
new_data, dtype=new_dtype, copy=False, name=name, **kwargs | ||
new_data, dtype=new_dtype, copy=copy, name=name, **kwargs | ||
) | ||
|
||
klass = cls._dtype_to_subclass(arr.dtype) | ||
arr = klass._ensure_array(arr, dtype, copy) | ||
if kwargs: | ||
raise TypeError(f"Unexpected keyword arguments {repr(set(kwargs))}") | ||
if subarr.ndim > 1: | ||
# GH#13601, GH#20285, GH#27125 | ||
raise ValueError("Index data must be 1-dimensional") | ||
return cls._simple_new(subarr, name) | ||
return klass._simple_new(arr, name) | ||
|
||
elif data is None or is_scalar(data): | ||
elif is_scalar(data): | ||
raise cls._scalar_data_error(data) | ||
elif hasattr(data, "__array__"): | ||
return Index(np.asarray(data), dtype=dtype, copy=copy, name=name, **kwargs) | ||
else: | ||
|
||
if tupleize_cols and is_list_like(data): | ||
# GH21470: convert iterable to list before determining if empty | ||
if is_iterator(data): | ||
|
@@ -400,6 +389,64 @@ def __new__( | |
subarr = com.asarray_tuplesafe(data, dtype=object) | ||
return Index(subarr, dtype=dtype, copy=copy, name=name, **kwargs) | ||
|
||
@classmethod | ||
def _ensure_array(cls, data, dtype, copy: bool): | ||
""" | ||
Ensure we have a valid array to pass to _simple_new. | ||
""" | ||
if data.ndim > 1: | ||
# GH#13601, GH#20285, GH#27125 | ||
raise ValueError("Index data must be 1-dimensional") | ||
if copy: | ||
# asarray_tuplesafe does not always copy underlying data, | ||
# so need to make sure that this happens | ||
data = data.copy() | ||
return data | ||
|
||
@classmethod | ||
def _dtype_to_subclass(cls, dtype: DtypeObj): | ||
# Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423 | ||
|
||
if isinstance(dtype, DatetimeTZDtype) or dtype == np.dtype("M8[ns]"): | ||
from pandas import DatetimeIndex | ||
|
||
return DatetimeIndex | ||
if dtype == "m8[ns]": | ||
from pandas import TimedeltaIndex | ||
|
||
return TimedeltaIndex | ||
if isinstance(dtype, CategoricalDtype): | ||
from pandas import CategoricalIndex | ||
|
||
return CategoricalIndex | ||
if isinstance(dtype, IntervalDtype): | ||
from pandas import IntervalIndex | ||
|
||
return IntervalIndex | ||
if isinstance(dtype, PeriodDtype): | ||
from pandas import PeriodIndex | ||
|
||
return PeriodIndex | ||
|
||
if is_float_dtype(dtype): | ||
from pandas import Float64Index | ||
|
||
return Float64Index | ||
if is_unsigned_integer_dtype(dtype): | ||
from pandas import UInt64Index | ||
|
||
return UInt64Index | ||
if is_signed_integer_dtype(dtype): | ||
from pandas import Int64Index | ||
|
||
return Int64Index | ||
|
||
if dtype == object: | ||
# NB: assuming away MultiIndex | ||
return Index | ||
|
||
raise NotImplementedError(dtype) | ||
|
||
""" | ||
NOTE for new Index creation: | ||
|
||
|
@@ -6048,25 +6095,27 @@ def _maybe_cast_data_without_dtype(subarr): | |
TimedeltaArray, | ||
) | ||
|
||
assert subarr.dtype == object, subarr.dtype | ||
inferred = lib.infer_dtype(subarr, skipna=False) | ||
|
||
if inferred == "integer": | ||
try: | ||
data = _try_convert_to_int_array(subarr, False, None) | ||
return data, data.dtype | ||
return data | ||
except ValueError: | ||
pass | ||
|
||
return subarr, object | ||
return subarr | ||
|
||
elif inferred in ["floating", "mixed-integer-float", "integer-na"]: | ||
# TODO: Returns IntegerArray for integer-na case in the future | ||
return subarr, np.float64 | ||
data = np.asarray(subarr).astype(np.float64) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. copy=False There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. updated+green |
||
return data | ||
|
||
elif inferred == "interval": | ||
try: | ||
data = IntervalArray._from_sequence(subarr, copy=False) | ||
return data, data.dtype | ||
return data | ||
except ValueError: | ||
# GH27172: mixed closed Intervals --> object dtype | ||
pass | ||
|
@@ -6077,7 +6126,7 @@ def _maybe_cast_data_without_dtype(subarr): | |
if inferred.startswith("datetime"): | ||
try: | ||
data = DatetimeArray._from_sequence(subarr, copy=False) | ||
return data, data.dtype | ||
return data | ||
except (ValueError, OutOfBoundsDatetime): | ||
# GH 27011 | ||
# If we have mixed timezones, just send it | ||
|
@@ -6086,15 +6135,15 @@ def _maybe_cast_data_without_dtype(subarr): | |
|
||
elif inferred.startswith("timedelta"): | ||
data = TimedeltaArray._from_sequence(subarr, copy=False) | ||
return data, data.dtype | ||
return data | ||
elif inferred == "period": | ||
try: | ||
data = PeriodArray._from_sequence(subarr) | ||
return data, data.dtype | ||
return data | ||
except IncompatibleFrequency: | ||
pass | ||
|
||
return subarr, subarr.dtype | ||
return subarr | ||
|
||
|
||
def _try_convert_to_int_array( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -46,11 +46,20 @@ class NumericIndex(Index): | |
_can_hold_strings = False | ||
|
||
def __new__(cls, data=None, dtype=None, copy=False, name=None): | ||
cls._validate_dtype(dtype) | ||
name = maybe_extract_name(name, data, cls) | ||
|
||
# Coerce to ndarray if not already ndarray or Index | ||
subarr = cls._ensure_array(data, dtype, copy) | ||
return cls._simple_new(subarr, name=name) | ||
|
||
@classmethod | ||
def _ensure_array(cls, data, dtype, copy: bool): | ||
""" | ||
Ensure we have a valid array to pass to _simple_new. | ||
""" | ||
cls._validate_dtype(dtype) | ||
|
||
if not isinstance(data, (np.ndarray, Index)): | ||
# Coerce to ndarray if not already ndarray or Index | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can use _ensure_array on L81 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. L81 is inside ensure_array There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. oh i c, i guess i meant can you use the super class version There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. theres really only one line that gets shared, i dont think its worth it (for now at least) |
||
if is_scalar(data): | ||
raise cls._scalar_data_error(data) | ||
|
||
|
@@ -74,7 +83,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None): | |
raise ValueError("Index data must be 1-dimensional") | ||
|
||
subarr = np.asarray(subarr) | ||
return cls._simple_new(subarr, name=name) | ||
return subarr | ||
|
||
@classmethod | ||
def _validate_dtype(cls, dtype: Dtype) -> None: | ||
|
Uh oh!
There was an error while loading. Please reload this page.