Skip to content

PERF: default_index #43623

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 17, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -947,7 +947,7 @@ def mode(values, dropna: bool = True) -> Series:
mode : Series
"""
from pandas import Series
import pandas.core.indexes.base as ibase
from pandas.core.indexes.api import default_index

values = _ensure_arraylike(values)
original = values
Expand All @@ -973,7 +973,7 @@ def mode(values, dropna: bool = True) -> Series:

result = _reconstruct_data(npresult, original.dtype, original)
# Ensure index is type stable (should always use int index)
return Series(result, index=ibase.default_index(len(result)))
return Series(result, index=default_index(len(result)))


def rank(
Expand Down
10 changes: 6 additions & 4 deletions pandas/core/arrays/sparse/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,16 +360,18 @@ def density(self) -> float:

@staticmethod
def _prep_index(data, index, columns):
from pandas.core.indexes.api import ensure_index
import pandas.core.indexes.base as ibase
from pandas.core.indexes.api import (
default_index,
ensure_index,
)

N, K = data.shape
if index is None:
index = ibase.default_index(N)
index = default_index(N)
else:
index = ensure_index(index)
if columns is None:
columns = ibase.default_index(K)
columns = default_index(K)
else:
columns = ensure_index(columns)

Expand Down
12 changes: 6 additions & 6 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,11 +160,11 @@
_shared_docs,
)
from pandas.core.indexers import check_key_length
from pandas.core.indexes import base as ibase
from pandas.core.indexes.api import (
DatetimeIndex,
Index,
PeriodIndex,
default_index,
ensure_index,
ensure_index_from_sequences,
)
Expand Down Expand Up @@ -5780,7 +5780,7 @@ class max type
else:
new_obj = self.copy()

new_index = ibase.default_index(len(new_obj))
new_index = default_index(len(new_obj))
if level is not None:
if not isinstance(level, (tuple, list)):
level = [level]
Expand Down Expand Up @@ -6101,7 +6101,7 @@ def drop_duplicates(

result = self[-duplicated]
if ignore_index:
result.index = ibase.default_index(len(result))
result.index = default_index(len(result))

if inplace:
self._update_inplace(result)
Expand Down Expand Up @@ -6317,7 +6317,7 @@ def sort_values( # type: ignore[override]

if ignore_index:
new_data.set_axis(
self._get_block_manager_axis(axis), ibase.default_index(len(indexer))
self._get_block_manager_axis(axis), default_index(len(indexer))
)

result = self._constructor(new_data)
Expand Down Expand Up @@ -8320,7 +8320,7 @@ def explode(
result = DataFrame({c: df[c].explode() for c in columns})
result = df.drop(columns, axis=1).join(result)
if ignore_index:
result.index = ibase.default_index(len(result))
result.index = default_index(len(result))
else:
result.index = self.index.take(result.index)
result = result.reindex(columns=self.columns, copy=False)
Expand Down Expand Up @@ -10286,7 +10286,7 @@ def f(s):
data = data.apply(f, axis=axis)
# Ensure index is type stable (should always use int index)
if data.empty:
data.index = ibase.default_index(0)
data.index = default_index(0)

return data

Expand Down
6 changes: 3 additions & 3 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,13 +125,13 @@
)
from pandas.core.describe import describe_ndframe
from pandas.core.flags import Flags
from pandas.core.indexes import base as ibase
from pandas.core.indexes.api import (
DatetimeIndex,
Index,
MultiIndex,
PeriodIndex,
RangeIndex,
default_index,
ensure_index,
)
from pandas.core.internals import (
Expand Down Expand Up @@ -4589,7 +4589,7 @@ def sort_index(

if ignore_index:
axis = 1 if isinstance(self, ABCDataFrame) else 0
new_data.set_axis(axis, ibase.default_index(len(indexer)))
new_data.set_axis(axis, default_index(len(indexer)))

result = self._constructor(new_data)

Expand Down Expand Up @@ -5330,7 +5330,7 @@ def sample(
result = self.take(sampled_indices, axis=axis)

if ignore_index:
result.index = ibase.default_index(len(result))
result.index = default_index(len(result))

return result

Expand Down
6 changes: 6 additions & 0 deletions pandas/core/indexes/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
"union_indexes",
"get_unanimous_names",
"all_indexes_same",
"default_index",
]


Expand Down Expand Up @@ -287,3 +288,8 @@ def all_indexes_same(indexes) -> bool:
itr = iter(indexes)
first = next(itr)
return all(first.equals(index) for index in itr)


def default_index(n: int) -> RangeIndex:
rng = range(0, n)
return RangeIndex._simple_new(rng, name=None)
7 changes: 0 additions & 7 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,6 @@
DataFrame,
IntervalIndex,
MultiIndex,
RangeIndex,
Series,
)
from pandas.core.arrays import PeriodArray
Expand Down Expand Up @@ -6799,12 +6798,6 @@ def _validate_join_method(method: str) -> None:
raise ValueError(f"do not recognize join method {method}")


def default_index(n: int) -> RangeIndex:
from pandas.core.indexes.range import RangeIndex

return RangeIndex(0, n, name=None)


def maybe_extract_name(name, obj, cls) -> Hashable:
"""
If no name is passed, then extract it from data, validating hashability.
Expand Down
20 changes: 10 additions & 10 deletions pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,11 @@
range_to_ndarray,
sanitize_array,
)
from pandas.core.indexes import base as ibase
from pandas.core.indexes.api import (
DatetimeIndex,
Index,
TimedeltaIndex,
default_index,
ensure_index,
get_objs_combined_axis,
union_indexes,
Expand Down Expand Up @@ -502,9 +502,9 @@ def nested_data_to_arrays(
index = _get_names_from_index(data)
elif isinstance(data[0], Categorical):
# GH#38845 hit in test_constructor_categorical
index = ibase.default_index(len(data[0]))
index = default_index(len(data[0]))
else:
index = ibase.default_index(len(data))
index = default_index(len(data))

return arrays, columns, index

Expand Down Expand Up @@ -665,7 +665,7 @@ def _extract_index(data) -> Index:
)
raise ValueError(msg)
else:
index = ibase.default_index(lengths[0])
index = default_index(lengths[0])

# error: Argument 1 to "ensure_index" has incompatible type "Optional[Index]";
# expected "Union[Union[Union[ExtensionArray, ndarray], Index, Series],
Expand Down Expand Up @@ -707,7 +707,7 @@ def reorder_arrays(
def _get_names_from_index(data) -> Index:
has_some_name = any(getattr(s, "name", None) is not None for s in data)
if not has_some_name:
return ibase.default_index(len(data))
return default_index(len(data))

index: list[Hashable] = list(range(len(data)))
count = 0
Expand All @@ -729,12 +729,12 @@ def _get_axes(
# return axes or defaults

if index is None:
index = ibase.default_index(N)
index = default_index(N)
else:
index = ensure_index(index)

if columns is None:
columns = ibase.default_index(K)
columns = default_index(K)
else:
columns = ensure_index(columns)
return index, columns
Expand Down Expand Up @@ -833,7 +833,7 @@ def to_arrays(
stacklevel=4,
)
if columns is None:
columns = ibase.default_index(len(data))
columns = default_index(len(data))
elif len(columns) > len(data):
raise ValueError("len(columns) > len(data)")
elif len(columns) < len(data):
Expand Down Expand Up @@ -890,7 +890,7 @@ def _list_of_series_to_arrays(
for s in data:
index = getattr(s, "index", None)
if index is None:
index = ibase.default_index(len(s))
index = default_index(len(s))

if id(index) in indexer_cache:
indexer = indexer_cache[id(index)]
Expand Down Expand Up @@ -995,7 +995,7 @@ def _validate_or_indexify_columns(
not equal to length of content
"""
if columns is None:
columns = ibase.default_index(len(content))
columns = default_index(len(content))
else:

# Add mask for data which is composed of list of lists
Expand Down
8 changes: 4 additions & 4 deletions pandas/core/reshape/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,11 @@
Index,
MultiIndex,
all_indexes_same,
default_index,
ensure_index,
get_objs_combined_axis,
get_unanimous_names,
)
import pandas.core.indexes.base as ibase
from pandas.core.internals import concatenate_managers

if TYPE_CHECKING:
Expand Down Expand Up @@ -619,7 +619,7 @@ def _get_concat_axis(self) -> Index:
if self.bm_axis == 0:
indexes = [x.index for x in self.objs]
elif self.ignore_index:
idx = ibase.default_index(len(self.objs))
idx = default_index(len(self.objs))
return idx
elif self.keys is None:
names: list[Hashable] = [None] * len(self.objs)
Expand All @@ -640,14 +640,14 @@ def _get_concat_axis(self) -> Index:
if has_names:
return Index(names)
else:
return ibase.default_index(len(self.objs))
return default_index(len(self.objs))
else:
return ensure_index(self.keys).set_names(self.names)
else:
indexes = [x.axes[self.axis] for x in self.objs]

if self.ignore_index:
idx = ibase.default_index(sum(len(i) for i in indexes))
idx = default_index(sum(len(i) for i in indexes))
return idx

if self.keys is None:
Expand Down
11 changes: 6 additions & 5 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@
MultiIndex,
PeriodIndex,
TimedeltaIndex,
default_index,
ensure_index,
)
import pandas.core.indexes.base as ibase
Expand Down Expand Up @@ -425,7 +426,7 @@ def __init__(
if index is None:
if not is_list_like(data):
data = [data]
index = ibase.default_index(len(data))
index = default_index(len(data))
elif is_list_like(data):
com.require_length_match(data, index)

Expand Down Expand Up @@ -1424,7 +1425,7 @@ def reset_index(self, level=None, drop=False, name=None, inplace=False):
"""
inplace = validate_bool_kwarg(inplace, "inplace")
if drop:
new_index = ibase.default_index(len(self))
new_index = default_index(len(self))
if level is not None:
if not isinstance(level, (tuple, list)):
level = [level]
Expand Down Expand Up @@ -1745,7 +1746,7 @@ def to_frame(self, name=None) -> DataFrame:
name = self.name
if name is None:
# default to [0], same as we would get with DataFrame(self)
columns = ibase.default_index(1)
columns = default_index(1)
else:
columns = Index([name])
else:
Expand Down Expand Up @@ -3466,7 +3467,7 @@ def sort_values(
)

if ignore_index:
result.index = ibase.default_index(len(sorted_index))
result.index = default_index(len(sorted_index))

if inplace:
self._update_inplace(result)
Expand Down Expand Up @@ -4039,7 +4040,7 @@ def explode(self, ignore_index: bool = False) -> Series:
values, counts = reshape.explode(np.asarray(self._values))

if ignore_index:
index = ibase.default_index(len(values))
index = default_index(len(values))
else:
index = self.index.repeat(counts)

Expand Down