-
-
Notifications
You must be signed in to change notification settings - Fork 18.6k
CLN: annotations in core.apply #29477
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
24dc1c7
ebd68a5
92cbe83
dade0d1
82479f4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,6 @@ | ||
import abc | ||
import inspect | ||
from typing import TYPE_CHECKING, Iterator, Type | ||
|
||
import numpy as np | ||
|
||
|
@@ -13,22 +15,25 @@ | |
) | ||
from pandas.core.dtypes.generic import ABCSeries | ||
|
||
if TYPE_CHECKING: | ||
from pandas import DataFrame, Series, Index | ||
|
||
|
||
def frame_apply( | ||
obj, | ||
obj: "DataFrame", | ||
func, | ||
axis=0, | ||
raw=False, | ||
raw: bool = False, | ||
result_type=None, | ||
ignore_failures=False, | ||
ignore_failures: bool = False, | ||
args=None, | ||
kwds=None, | ||
): | ||
""" construct and return a row or column based frame apply object """ | ||
|
||
axis = obj._get_axis_number(axis) | ||
if axis == 0: | ||
klass = FrameRowApply | ||
klass = FrameRowApply # type: Type[FrameApply] | ||
elif axis == 1: | ||
klass = FrameColumnApply | ||
|
||
|
@@ -43,8 +48,38 @@ def frame_apply( | |
) | ||
|
||
|
||
class FrameApply: | ||
def __init__(self, obj, func, raw, result_type, ignore_failures, args, kwds): | ||
class FrameApply(metaclass=abc.ABCMeta): | ||
|
||
# --------------------------------------------------------------- | ||
# Abstract Methods | ||
axis: int | ||
|
||
@property | ||
@abc.abstractmethod | ||
def result_index(self) -> "Index": | ||
pass | ||
|
||
@property | ||
@abc.abstractmethod | ||
def result_columns(self) -> "Index": | ||
pass | ||
|
||
@abc.abstractmethod | ||
def series_generator(self) -> Iterator["Series"]: | ||
pass | ||
|
||
# --------------------------------------------------------------- | ||
|
||
def __init__( | ||
self, | ||
obj: "DataFrame", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should we consider making FrameApply generic if we want subclassed DataFrames to return same type? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. im not sure i understand the question. FrameApply is the base class for ColumnFrameApply and RowFrameApply There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've not looked in detail but may not be relevant here. Not sure how geopandas (or other downstream packages work), Just keeping in mind that, say GeoDataFrame, may return a GeoDataFrame with certain operations. did start to type geopandas so I had a better idea, see geopandas/geopandas@master...simonjayhawkins:typing. Note to self: Should try and get back to this. |
||
func, | ||
raw: bool, | ||
result_type, | ||
ignore_failures: bool, | ||
args, | ||
kwds, | ||
): | ||
self.obj = obj | ||
self.raw = raw | ||
self.ignore_failures = ignore_failures | ||
|
@@ -76,23 +111,23 @@ def f(x): | |
self.res_columns = None | ||
|
||
@property | ||
def columns(self): | ||
def columns(self) -> "Index": | ||
return self.obj.columns | ||
|
||
@property | ||
def index(self): | ||
def index(self) -> "Index": | ||
return self.obj.index | ||
|
||
@cache_readonly | ||
def values(self): | ||
return self.obj.values | ||
|
||
@cache_readonly | ||
def dtypes(self): | ||
def dtypes(self) -> "Series": | ||
return self.obj.dtypes | ||
|
||
@property | ||
def agg_axis(self): | ||
def agg_axis(self) -> "Index": | ||
return self.obj._get_agg_axis(self.axis) | ||
|
||
def get_result(self): | ||
|
@@ -127,7 +162,7 @@ def get_result(self): | |
|
||
# broadcasting | ||
if self.result_type == "broadcast": | ||
return self.apply_broadcast() | ||
return self.apply_broadcast(self.obj) | ||
|
||
# one axis empty | ||
elif not all(self.obj.shape): | ||
|
@@ -191,7 +226,7 @@ def apply_raw(self): | |
else: | ||
return self.obj._constructor_sliced(result, index=self.agg_axis) | ||
|
||
def apply_broadcast(self, target): | ||
def apply_broadcast(self, target: "DataFrame") -> "DataFrame": | ||
result_values = np.empty_like(target.values) | ||
|
||
# axis which we want to compare compliance | ||
|
@@ -317,19 +352,19 @@ def wrap_results(self): | |
class FrameRowApply(FrameApply): | ||
axis = 0 | ||
|
||
def apply_broadcast(self): | ||
return super().apply_broadcast(self.obj) | ||
def apply_broadcast(self, target: "DataFrame") -> "DataFrame": | ||
return super().apply_broadcast(target) | ||
|
||
@property | ||
def series_generator(self): | ||
return (self.obj._ixs(i, axis=1) for i in range(len(self.columns))) | ||
|
||
@property | ||
def result_index(self): | ||
def result_index(self) -> "Index": | ||
return self.columns | ||
|
||
@property | ||
def result_columns(self): | ||
def result_columns(self) -> "Index": | ||
return self.index | ||
|
||
def wrap_results_for_axis(self): | ||
|
@@ -351,8 +386,8 @@ def wrap_results_for_axis(self): | |
class FrameColumnApply(FrameApply): | ||
axis = 1 | ||
|
||
def apply_broadcast(self): | ||
result = super().apply_broadcast(self.obj.T) | ||
def apply_broadcast(self, target: "DataFrame") -> "DataFrame": | ||
result = super().apply_broadcast(target.T) | ||
return result.T | ||
|
||
@property | ||
|
@@ -364,11 +399,11 @@ def series_generator(self): | |
) | ||
|
||
@property | ||
def result_index(self): | ||
def result_index(self) -> "Index": | ||
return self.index | ||
|
||
@property | ||
def result_columns(self): | ||
def result_columns(self) -> "Index": | ||
return self.columns | ||
|
||
def wrap_results_for_axis(self): | ||
|
@@ -392,7 +427,7 @@ def wrap_results_for_axis(self): | |
|
||
return result | ||
|
||
def infer_to_same_shape(self): | ||
def infer_to_same_shape(self) -> "DataFrame": | ||
""" infer the results to the same shape as the input object """ | ||
results = self.results | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For return types, this could also be typed as
Iterable
. we've done this in a couple of places.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
isnt iterator more informative than iterable?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
for a return type it's nor relevant, so it's just a consistency thing.