Skip to content

Commit ca64ba4

Browse files
author
Matias Heikkilä
committed
BUG: groupby.agg returns incorrect results for uint64 cols (#26310)
1 parent 17247ed commit ca64ba4

File tree

4 files changed

+28
-4
lines changed

4 files changed

+28
-4
lines changed

doc/source/whatsnew/v0.25.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -403,6 +403,7 @@ Groupby/Resample/Rolling
403403
- Bug in :meth:`pandas.core.groupby.GroupBy.idxmax` and :meth:`pandas.core.groupby.GroupBy.idxmin` with datetime column would return incorrect dtype (:issue:`25444`, :issue:`15306`)
404404
- Bug in :meth:`pandas.core.groupby.GroupBy.cumsum`, :meth:`pandas.core.groupby.GroupBy.cumprod`, :meth:`pandas.core.groupby.GroupBy.cummin` and :meth:`pandas.core.groupby.GroupBy.cummax` with categorical column having absent categories, would return incorrect result or segfault (:issue:`16771`)
405405
- Bug in :meth:`pandas.core.groupby.GroupBy.nth` where NA values in the grouping would return incorrect results (:issue:`26011`)
406+
- Bug in :meth:`pandas.core.groupby.GroupBy.agg` where incorrect results are returned for uint64 columns. (:issue:`26310`)
406407

407408

408409
Reshaping

pandas/core/dtypes/common.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
from pandas._libs import algos, lib
77
from pandas._libs.tslibs import conversion
8+
from pandas._typing import ArrayLike
89
from pandas.compat import PY36
910

1011
from pandas.core.dtypes.dtypes import (
@@ -87,10 +88,10 @@ def ensure_categorical(arr):
8788
return arr
8889

8990

90-
def ensure_int64_or_float64(arr, copy=False):
91+
def ensure_int_or_float(arr: ArrayLike, copy=False) -> np.array:
9192
"""
9293
Ensure that an dtype array of some integer dtype
93-
has an int64 dtype if possible
94+
has an int64 dtype if possible.
9495
If it's not possible, potentially because of overflow,
9596
convert the array to float64 instead.
9697
@@ -107,9 +108,18 @@ def ensure_int64_or_float64(arr, copy=False):
107108
out_arr : The input array cast as int64 if
108109
possible without overflow.
109110
Otherwise the input array cast to float64.
111+
112+
Notes
113+
-------
114+
If the array is explicitly of type uint64 the type
115+
will remain unchanged.
110116
"""
111117
try:
112118
return arr.astype('int64', copy=copy, casting='safe')
119+
except TypeError:
120+
pass
121+
try:
122+
return arr.astype('uint64', copy=copy, casting='safe')
113123
except TypeError:
114124
return arr.astype('float64', copy=copy)
115125

pandas/core/groupby/ops.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from pandas.util._decorators import cache_readonly
1818

1919
from pandas.core.dtypes.common import (
20-
ensure_float64, ensure_int64, ensure_int64_or_float64, ensure_object,
20+
ensure_float64, ensure_int64, ensure_int_or_float, ensure_object,
2121
ensure_platform_int, is_bool_dtype, is_categorical_dtype, is_complex_dtype,
2222
is_datetime64_any_dtype, is_integer_dtype, is_numeric_dtype,
2323
is_timedelta64_dtype, needs_i8_conversion)
@@ -486,7 +486,7 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1,
486486
if (values == iNaT).any():
487487
values = ensure_float64(values)
488488
else:
489-
values = ensure_int64_or_float64(values)
489+
values = ensure_int_or_float(values)
490490
elif is_numeric and not is_complex_dtype(values):
491491
values = ensure_float64(values)
492492
else:

pandas/tests/groupby/aggregate/test_aggregate.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -313,3 +313,16 @@ def test_order_aggregate_multiple_funcs():
313313
expected = pd.Index(['sum', 'max', 'mean', 'ohlc', 'min'])
314314

315315
tm.assert_index_equal(result, expected)
316+
317+
318+
@pytest.mark.parametrize('dtype', [np.int64, np.uint64])
319+
@pytest.mark.parametrize('how', ['first', 'last', 'min',
320+
'max', 'mean', 'median'])
321+
def test_uint64_type_handling(dtype, how):
322+
# GH 26310
323+
df = pd.DataFrame({'x': 6903052872240755750, 'y': [1, 2]})
324+
expected = df.groupby('y').agg({'x': how})
325+
df.x = df.x.astype(dtype)
326+
result = df.groupby('y').agg({'x': how})
327+
result.x = result.x.astype(np.int64)
328+
tm.assert_frame_equal(result, expected, check_exact=True)

0 commit comments

Comments
 (0)