Skip to content

CLN core.groupby #29389

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Nov 4, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -586,9 +586,16 @@ def _aggregate_multiple_funcs(self, arg, _level, _axis):
new_res = colg.aggregate(arg)
except (TypeError, DataError):
pass
except ValueError:
except ValueError as err:
# cannot aggregate
continue
if "Must produce aggregated value" in str(err):
# raised directly in _aggregate_named
pass
elif "no results" in str(err):
# raised direcly in _aggregate_multiple_funcs
pass
else:
raise
else:
results.append(new_res)
keys.append(col)
Expand Down
12 changes: 4 additions & 8 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ def aggregate(self, func=None, *args, **kwargs):
if isinstance(func, str):
return getattr(self, func)(*args, **kwargs)

if isinstance(func, abc.Iterable):
elif isinstance(func, abc.Iterable):
# Catch instances of lists / tuples
# but not the class list / tuple itself.
func = _maybe_mangle_lambdas(func)
Expand All @@ -261,8 +261,6 @@ def aggregate(self, func=None, *args, **kwargs):

try:
return self._python_agg_general(func, *args, **kwargs)
except (AssertionError, TypeError):
raise
except (ValueError, KeyError, AttributeError, IndexError):
# TODO: IndexError can be removed here following GH#29106
# TODO: AttributeError is caused by _index_data hijinx in
Expand Down Expand Up @@ -325,7 +323,7 @@ def _aggregate_multiple_funcs(self, arg, _level):
if name in results:
raise SpecificationError(
"Function names must be unique, found multiple named "
"{}".format(name)
"{name}".format(name=name)
)

# reset the cache so that we
Expand Down Expand Up @@ -1464,8 +1462,6 @@ def _transform_item_by_item(self, obj, wrapper):
for i, col in enumerate(obj):
try:
output[col] = self[col].transform(wrapper)
except AssertionError:
raise
except TypeError:
# e.g. trying to call nanmean with string values
pass
Expand Down Expand Up @@ -1538,8 +1534,8 @@ def filter(self, func, dropna=True, *args, **kwargs):
else:
# non scalars aren't allowed
raise TypeError(
"filter function returned a %s, "
"but expected a scalar bool" % type(res).__name__
"filter function returned a {typ}, "
"but expected a scalar bool".format(typ=type(res).__name__)
)

return self._apply_filter(indices, dropna)
Expand Down
8 changes: 5 additions & 3 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,7 @@ def __init__(
self,
obj: NDFrame,
keys=None,
axis=0,
axis: int = 0,
level=None,
grouper=None,
exclusions=None,
Expand Down Expand Up @@ -561,7 +561,9 @@ def __getattr__(self, attr):
return self[attr]

raise AttributeError(
"%r object has no attribute %r" % (type(self).__name__, attr)
"'{typ}' object has no attribute '{attr}'".format(
typ=type(self).__name__, attr=attr
)
)

@Substitution(
Expand Down Expand Up @@ -2486,6 +2488,6 @@ def groupby(obj, by, **kwds):

klass = DataFrameGroupBy
else:
raise TypeError("invalid type: {}".format(obj))
raise TypeError("invalid type: {obj}".format(obj=obj))

return klass(obj, by, **kwds)
28 changes: 19 additions & 9 deletions pandas/core/groupby/grouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,9 @@ def _set_grouper(self, obj, sort=False):
ax = self._grouper.take(obj.index)
else:
if key not in obj._info_axis:
raise KeyError("The grouper name {0} is not found".format(key))
raise KeyError(
"The grouper name {key} is not found".format(key=key)
)
ax = Index(obj[key], name=key)

else:
Expand All @@ -188,7 +190,9 @@ def _set_grouper(self, obj, sort=False):

else:
if level not in (0, ax.name):
raise ValueError("The level {0} is not valid".format(level))
raise ValueError(
"The level {level} is not valid".format(level=level)
)

# possibly sort
if (self.sort or sort) and not ax.is_monotonic:
Expand Down Expand Up @@ -278,7 +282,9 @@ def __init__(
if level is not None:
if not isinstance(level, int):
if level not in index.names:
raise AssertionError("Level {} not in index".format(level))
raise AssertionError(
"Level {level} not in index".format(level=level)
)
level = index.names.index(level)

if self.name is None:
Expand Down Expand Up @@ -344,15 +350,17 @@ def __init__(
):
if getattr(self.grouper, "ndim", 1) != 1:
t = self.name or str(type(self.grouper))
raise ValueError("Grouper for '{}' not 1-dimensional".format(t))
raise ValueError("Grouper for '{t}' not 1-dimensional".format(t=t))
self.grouper = self.index.map(self.grouper)
if not (
hasattr(self.grouper, "__len__")
and len(self.grouper) == len(self.index)
):
errmsg = (
"Grouper result violates len(labels) == "
"len(data)\nresult: %s" % pprint_thing(self.grouper)
"len(data)\nresult: {grper}".format(
grper=pprint_thing(self.grouper)
)
)
self.grouper = None # Try for sanity
raise AssertionError(errmsg)
Expand Down Expand Up @@ -426,7 +434,7 @@ def groups(self):
def _get_grouper(
obj: NDFrame,
key=None,
axis=0,
axis: int = 0,
level=None,
sort=True,
observed=False,
Expand Down Expand Up @@ -493,7 +501,9 @@ def _get_grouper(
if isinstance(level, str):
if obj.index.name != level:
raise ValueError(
"level name {} is not the name of the index".format(level)
"level name {level} is not the name of the index".format(
level=level
)
)
elif level > 0 or level < -1:
raise ValueError("level > 0 or level < -1 only valid with MultiIndex")
Expand Down Expand Up @@ -582,7 +592,7 @@ def _get_grouper(
exclusions = []

# if the actual grouper should be obj[key]
def is_in_axis(key):
def is_in_axis(key) -> bool:
if not _is_label_like(key):
items = obj._data.items
try:
Expand All @@ -594,7 +604,7 @@ def is_in_axis(key):
return True

# if the grouper is obj[name]
def is_in_obj(gpr):
def is_in_obj(gpr) -> bool:
if not hasattr(gpr, "name"):
return False
try:
Expand Down
13 changes: 8 additions & 5 deletions pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"""

import collections
from typing import List, Optional

import numpy as np

Expand Down Expand Up @@ -385,7 +386,7 @@ def get_func(fname):

return func

def _cython_operation(self, kind, values, how, axis, min_count=-1, **kwargs):
def _cython_operation(self, kind: str, values, how, axis, min_count=-1, **kwargs):
assert kind in ["transform", "aggregate"]
orig_values = values

Expand All @@ -398,16 +399,18 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1, **kwargs):
# categoricals are only 1d, so we
# are not setup for dim transforming
if is_categorical_dtype(values) or is_sparse(values):
raise NotImplementedError("{} dtype not supported".format(values.dtype))
raise NotImplementedError(
"{dtype} dtype not supported".format(dtype=values.dtype)
)
elif is_datetime64_any_dtype(values):
if how in ["add", "prod", "cumsum", "cumprod"]:
raise NotImplementedError(
"datetime64 type does not support {} operations".format(how)
"datetime64 type does not support {how} operations".format(how=how)
)
elif is_timedelta64_dtype(values):
if how in ["prod", "cumprod"]:
raise NotImplementedError(
"timedelta64 type does not support {} operations".format(how)
"timedelta64 type does not support {how} operations".format(how=how)
)

if is_datetime64tz_dtype(values.dtype):
Expand Down Expand Up @@ -513,7 +516,7 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1, **kwargs):
result = result[:, 0]

if how in self._name_functions:
names = self._name_functions[how]()
names = self._name_functions[how]() # type: Optional[List[str]]
else:
names = None

Expand Down
12 changes: 5 additions & 7 deletions pandas/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,8 +361,6 @@ def _groupby_and_aggregate(self, how, grouper=None, *args, **kwargs):
result = grouped._aggregate_item_by_item(how, *args, **kwargs)
else:
result = grouped.aggregate(how, *args, **kwargs)
except AssertionError:
raise
except DataError:
# we have a non-reducing function; try to evaluate
result = grouped.apply(how, *args, **kwargs)
Expand Down Expand Up @@ -1450,7 +1448,7 @@ def _get_resampler(self, obj, kind=None):
raise TypeError(
"Only valid with DatetimeIndex, "
"TimedeltaIndex or PeriodIndex, "
"but got an instance of %r" % type(ax).__name__
"but got an instance of '{typ}'".format(typ=type(ax).__name__)
)

def _get_grouper(self, obj, validate=True):
Expand All @@ -1463,7 +1461,7 @@ def _get_time_bins(self, ax):
if not isinstance(ax, DatetimeIndex):
raise TypeError(
"axis must be a DatetimeIndex, but got "
"an instance of %r" % type(ax).__name__
"an instance of {typ}".format(typ=type(ax).__name__)
)

if len(ax) == 0:
Expand Down Expand Up @@ -1539,7 +1537,7 @@ def _get_time_delta_bins(self, ax):
if not isinstance(ax, TimedeltaIndex):
raise TypeError(
"axis must be a TimedeltaIndex, but got "
"an instance of %r" % type(ax).__name__
"an instance of {typ}".format(typ=type(ax).__name__)
)

if not len(ax):
Expand All @@ -1564,7 +1562,7 @@ def _get_time_period_bins(self, ax):
if not isinstance(ax, DatetimeIndex):
raise TypeError(
"axis must be a DatetimeIndex, but got "
"an instance of %r" % type(ax).__name__
"an instance of {typ}".format(typ=type(ax).__name__)
)

freq = self.freq
Expand All @@ -1586,7 +1584,7 @@ def _get_period_bins(self, ax):
if not isinstance(ax, PeriodIndex):
raise TypeError(
"axis must be a PeriodIndex, but got "
"an instance of %r" % type(ax).__name__
"an instance of {typ}".format(typ=type(ax).__name__)
)

memb = ax.asfreq(self.freq, how=self.convention)
Expand Down