Skip to content

REF/TYP: Block._slice and related #45254

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 8, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 23 additions & 16 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,9 @@ def __repr__(self) -> str:
def __len__(self) -> int:
return len(self.values)

def _slice(self, slicer) -> ArrayLike:
def _slice(
self, slicer: slice | npt.NDArray[np.bool_] | npt.NDArray[np.intp]
) -> ArrayLike:
"""return a slice of my values"""

return self.values[slicer]
Expand All @@ -319,8 +321,13 @@ def getitem_block(self, slicer: slice | npt.NDArray[np.intp]) -> Block:

Only supports slices that preserve dimensionality.
"""
axis0_slicer = slicer[0] if isinstance(slicer, tuple) else slicer
new_mgr_locs = self._mgr_locs[axis0_slicer]
# Note: the only place where we are called with ndarray[intp]
# is from internals.concat, and we can verify that never happens
# with 1-column blocks, i.e. never for ExtensionBlock.

# Invalid index type "Union[slice, ndarray[Any, dtype[signedinteger[Any]]]]"
# for "BlockPlacement"; expected type "Union[slice, Sequence[int]]"
new_mgr_locs = self._mgr_locs[slicer] # type: ignore[index]

new_values = self._slice(slicer)

Expand Down Expand Up @@ -1622,43 +1629,43 @@ def take_nd(

return self.make_block_same_class(new_values, new_mgr_locs)

def _slice(self, slicer) -> ExtensionArray:
def _slice(
self, slicer: slice | npt.NDArray[np.bool_] | npt.NDArray[np.intp]
) -> ExtensionArray:
"""
Return a slice of my values.

Parameters
----------
slicer : slice, ndarray[int], or a tuple of these
slicer : slice, ndarray[int], or ndarray[bool]
Valid (non-reducing) indexer for self.values.

Returns
-------
ExtensionArray
"""
# Notes: ndarray[bool] is only reachable when via getitem_mgr, which
# is only for Series, i.e. self.ndim == 1.

# return same dims as we currently have
if not isinstance(slicer, tuple) and self.ndim == 2:
if self.ndim == 2:
# reached via getitem_block via _slice_take_blocks_ax0
# TODO(EA2D): won't be necessary with 2D EAs
slicer = (slicer, slice(None))

if isinstance(slicer, tuple) and len(slicer) == 2:
first = slicer[0]
if not isinstance(first, slice):
if not isinstance(slicer, slice):
raise AssertionError(
"invalid slicing for a 1-ndim ExtensionArray", first
"invalid slicing for a 1-ndim ExtensionArray", slicer
)
# GH#32959 only full-slicers along fake-dim0 are valid
# TODO(EA2D): won't be necessary with 2D EAs
# range(1) instead of self._mgr_locs to avoid exception on [::-1]
# see test_iloc_getitem_slice_negative_step_ea_block
new_locs = range(1)[first]
if len(new_locs):
# effectively slice(None)
slicer = slicer[1]
else:
new_locs = range(1)[slicer]
if not len(new_locs):
raise AssertionError(
"invalid slicing for a 1-ndim ExtensionArray", slicer
)
slicer = slice(None)

return self.values[slicer]

Expand Down
2 changes: 2 additions & 0 deletions pandas/core/internals/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,8 @@ def _get_mgr_concatenation_plan(mgr: BlockManager):

if not unit_no_ax0_reindexing:
# create block from subset of columns
# Note: Blocks with only 1 column will always have unit_no_ax0_reindexing,
# so we will never get here with ExtensionBlock.
blk = blk.getitem_block(ax0_blk_indexer)

# Assertions disabled for performance
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1805,7 +1805,7 @@ def _blklocs(self):
"""compat with BlockManager"""
return None

def getitem_mgr(self, indexer) -> SingleBlockManager:
def getitem_mgr(self, indexer: slice | npt.NDArray[np.bool_]) -> SingleBlockManager:
# similar to get_slice, but not restricted to slice indexer
blk = self._block
array = blk._slice(indexer)
Expand Down
19 changes: 8 additions & 11 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1024,10 +1024,13 @@ def _get_with(self, key):
# handle the dup indexing case GH#4246
return self.loc[key]

def _get_values_tuple(self, key):
def _get_values_tuple(self, key: tuple):
# mpl hackaround
if com.any_none(*key):
result = self._get_values(key)
# mpl compat if we look up e.g. ser[:, np.newaxis];
# see tests.series.timeseries.test_mpl_compat_hack
# the asarray is needed to avoid returning a 2D DatetimeArray
result = np.asarray(self._values[key])
deprecate_ndim_indexing(result, stacklevel=find_stack_level())
return result

Expand All @@ -1040,15 +1043,9 @@ def _get_values_tuple(self, key):
self
)

def _get_values(self, indexer):
try:
new_mgr = self._mgr.getitem_mgr(indexer)
return self._constructor(new_mgr).__finalize__(self)
except ValueError:
# mpl compat if we look up e.g. ser[:, np.newaxis];
# see tests.series.timeseries.test_mpl_compat_hack
# the asarray is needed to avoid returning a 2D DatetimeArray
return np.asarray(self._values[indexer])
def _get_values(self, indexer: slice | npt.NDArray[np.bool_]) -> Series:
new_mgr = self._mgr.getitem_mgr(indexer)
return self._constructor(new_mgr).__finalize__(self)

def _get_value(self, label, takeable: bool = False):
"""
Expand Down