Skip to content

REF: move Series-specific methods from NDFrame #40776

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Apr 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 38 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3424,7 +3424,7 @@ def __getitem__(self, key):
# - we have a MultiIndex on columns (test on self.columns, #21309)
if data.shape[1] == 1 and not isinstance(self.columns, MultiIndex):
# GH#26490 using data[key] can cause RecursionError
data = data._get_item_cache(key)
return data._get_item_cache(key)

return data

Expand Down Expand Up @@ -3807,6 +3807,43 @@ def _box_col_values(self, values, loc: int) -> Series:
klass = self._constructor_sliced
return klass(values, index=self.index, name=name, fastpath=True)

# ----------------------------------------------------------------------
# Lookup Caching

def _clear_item_cache(self) -> None:
self._item_cache.clear()

def _get_item_cache(self, item: Hashable) -> Series:
"""Return the cached item, item represents a label indexer."""
cache = self._item_cache
res = cache.get(item)
if res is None:
# All places that call _get_item_cache have unique columns,
# pending resolution of GH#33047

loc = self.columns.get_loc(item)
values = self._mgr.iget(loc)
res = self._box_col_values(values, loc).__finalize__(self)

cache[item] = res
res._set_as_cached(item, self)

# for a chain
res._is_copy = self._is_copy
return res

def _reset_cacher(self) -> None:
# no-op for DataFrame
pass

def _maybe_cache_changed(self, item, value: Series) -> None:
"""
The object has called back to us saying maybe it has changed.
"""
loc = self._info_axis.get_loc(item)
arraylike = value._values
self._mgr.iset(loc, arraylike)

# ----------------------------------------------------------------------
# Unsorted

Expand Down
85 changes: 3 additions & 82 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -876,8 +876,6 @@ def droplevel(self: FrameOrSeries, level, axis=0) -> FrameOrSeries:
def pop(self, item: Hashable) -> Union[Series, Any]:
result = self[item]
del self[item]
if self.ndim == 2:
result._reset_cacher()

return result

Expand Down Expand Up @@ -3519,46 +3517,12 @@ def to_csv(
# ----------------------------------------------------------------------
# Lookup Caching

@final
def _set_as_cached(self, item, cacher) -> None:
"""
Set the _cacher attribute on the calling object with a weakref to
cacher.
"""
self._cacher = (item, weakref.ref(cacher))

@final
def _reset_cacher(self) -> None:
"""
Reset the cacher.
"""
if hasattr(self, "_cacher"):
del self._cacher

@final
def _maybe_cache_changed(self, item, value) -> None:
"""
The object has called back to us saying maybe it has changed.
"""
loc = self._info_axis.get_loc(item)
arraylike = value._values
self._mgr.iset(loc, arraylike)

@final
@property
def _is_cached(self) -> bool_t:
"""Return boolean indicating if self is cached or not."""
return getattr(self, "_cacher", None) is not None

@final
def _get_cacher(self):
"""return my cacher or None"""
cacher = getattr(self, "_cacher", None)
if cacher is not None:
cacher = cacher[1]()
return cacher
raise AbstractMethodError(self)

@final
def _maybe_update_cacher(
self, clear: bool_t = False, verify_is_copy: bool_t = True
) -> None:
Expand All @@ -3573,32 +3537,15 @@ def _maybe_update_cacher(
verify_is_copy : bool, default True
Provide is_copy checks.
"""
cacher = getattr(self, "_cacher", None)
if cacher is not None:
ref = cacher[1]()

# we are trying to reference a dead referent, hence
# a copy
if ref is None:
del self._cacher
else:
if len(self) == len(ref):
# otherwise, either self or ref has swapped in new arrays
ref._maybe_cache_changed(cacher[0], self)
else:
# GH#33675 we have swapped in a new array, so parent
# reference to self is now invalid
ref._item_cache.pop(cacher[0], None)

if verify_is_copy:
self._check_setitem_copy(stacklevel=5, t="referent")

if clear:
self._clear_item_cache()

@final
def _clear_item_cache(self) -> None:
self._item_cache.clear()
raise AbstractMethodError(self)

# ----------------------------------------------------------------------
# Indexing Methods
Expand Down Expand Up @@ -3894,26 +3841,6 @@ class animal locomotion
def __getitem__(self, item):
raise AbstractMethodError(self)

@final
def _get_item_cache(self, item):
"""Return the cached item, item represents a label indexer."""
cache = self._item_cache
res = cache.get(item)
if res is None:
# All places that call _get_item_cache have unique columns,
# pending resolution of GH#33047

loc = self.columns.get_loc(item)
values = self._mgr.iget(loc)
res = self._box_col_values(values, loc).__finalize__(self)

cache[item] = res
res._set_as_cached(item, self)

# for a chain
res._is_copy = self._is_copy
return res

def _slice(self: FrameOrSeries, slobj: slice, axis=0) -> FrameOrSeries:
"""
Construct a slice of this container.
Expand All @@ -3939,7 +3866,6 @@ def _set_is_copy(self, ref: FrameOrSeries, copy: bool_t = True) -> None:
assert ref is not None
self._is_copy = weakref.ref(ref)

@final
def _check_is_chained_assignment_possible(self) -> bool_t:
"""
Check if we are a view, have a cacher, and are of mixed type.
Expand All @@ -3951,12 +3877,7 @@ def _check_is_chained_assignment_possible(self) -> bool_t:
single-dtype meaning that the cacher should be updated following
setting.
"""
if self._is_view and self._is_cached:
ref = self._get_cacher()
if ref is not None and ref._is_mixed_type:
self._check_setitem_copy(stacklevel=4, t="referent", force=True)
return True
elif self._is_copy:
if self._is_copy:
self._check_setitem_copy(stacklevel=4, t="referent")
return False

Expand Down
72 changes: 72 additions & 0 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
overload,
)
import warnings
import weakref

import numpy as np

Expand Down Expand Up @@ -1145,6 +1146,77 @@ def _set_value(self, label, value, takeable: bool = False):

self._set_values(loc, value)

# ----------------------------------------------------------------------
# Lookup Caching

@property
def _is_cached(self) -> bool:
"""Return boolean indicating if self is cached or not."""
return getattr(self, "_cacher", None) is not None

def _get_cacher(self):
"""return my cacher or None"""
cacher = getattr(self, "_cacher", None)
if cacher is not None:
cacher = cacher[1]()
return cacher

def _reset_cacher(self) -> None:
"""
Reset the cacher.
"""
if hasattr(self, "_cacher"):
# should only get here with self.ndim == 1
del self._cacher

def _set_as_cached(self, item, cacher) -> None:
"""
Set the _cacher attribute on the calling object with a weakref to
cacher.
"""
self._cacher = (item, weakref.ref(cacher))

def _clear_item_cache(self) -> None:
# no-op for Series
pass

def _check_is_chained_assignment_possible(self) -> bool:
"""
See NDFrame._check_is_chained_assignment_possible.__doc__
"""
if self._is_view and self._is_cached:
ref = self._get_cacher()
if ref is not None and ref._is_mixed_type:
self._check_setitem_copy(stacklevel=4, t="referent", force=True)
return True
return super()._check_is_chained_assignment_possible()

def _maybe_update_cacher(
self, clear: bool = False, verify_is_copy: bool = True
) -> None:
"""
See NDFrame._maybe_update_cacher.__doc__
"""
cacher = getattr(self, "_cacher", None)
if cacher is not None:
assert self.ndim == 1
ref: DataFrame = cacher[1]()

# we are trying to reference a dead referent, hence
# a copy
if ref is None:
del self._cacher
else:
if len(self) == len(ref):
# otherwise, either self or ref has swapped in new arrays
ref._maybe_cache_changed(cacher[0], self)
else:
# GH#33675 we have swapped in a new array, so parent
# reference to self is now invalid
ref._item_cache.pop(cacher[0], None)

super()._maybe_update_cacher(clear=clear, verify_is_copy=verify_is_copy)

# ----------------------------------------------------------------------
# Unsorted

Expand Down