Skip to content

Commit 2e9ca6b

Browse files
author
Matias Heikkilä
committed
BUG: groupby.agg returns incorrect results for uint64 cols (#26310)
1 parent 17247ed commit 2e9ca6b

File tree

3 files changed

+21
-2
lines changed

3 files changed

+21
-2
lines changed

doc/source/whatsnew/v0.25.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,7 @@ Performance Improvements
258258
Bug Fixes
259259
~~~~~~~~~
260260

261+
- Bug where groupby.agg (first, last, min, etc...) returns incorrect results for uint64 columns. (:issue:`26310`)
261262

262263

263264
Categorical

pandas/core/dtypes/common.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,9 +90,11 @@ def ensure_categorical(arr):
9090
def ensure_int64_or_float64(arr, copy=False):
9191
"""
9292
Ensure that an dtype array of some integer dtype
93-
has an int64 dtype if possible
93+
has an int64 dtype if possible.
9494
If it's not possible, potentially because of overflow,
9595
convert the array to float64 instead.
96+
If the array is explicitly of type uint64 the type
97+
will remain unchanged.
9698
9799
Parameters
98100
----------
@@ -107,11 +109,16 @@ def ensure_int64_or_float64(arr, copy=False):
107109
out_arr : The input array cast as int64 if
108110
possible without overflow.
109111
Otherwise the input array cast to float64.
112+
If the array is explicitly of type uint64 the type
113+
will remain unchanged.
110114
"""
111115
try:
112116
return arr.astype('int64', copy=copy, casting='safe')
113117
except TypeError:
114-
return arr.astype('float64', copy=copy)
118+
try:
119+
return arr.astype('uint64', copy=copy, casting='safe')
120+
except TypeError:
121+
return arr.astype('float64', copy=copy)
115122

116123

117124
def classes(*klasses):

pandas/tests/groupby/aggregate/test_aggregate.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -313,3 +313,14 @@ def test_order_aggregate_multiple_funcs():
313313
expected = pd.Index(['sum', 'max', 'mean', 'ohlc', 'min'])
314314

315315
tm.assert_index_equal(result, expected)
316+
317+
318+
def test_uint64_type_handling():
319+
# GH 26310
320+
df1 = pd.DataFrame({'x': 6903052872240755750, 'y': [1, 2]})
321+
df1.groupby('y').agg({'x': 'first'})
322+
df2 = df1
323+
df2.x = df2.x.astype(np.uint64)
324+
df2.groupby('y').agg({'x': 'first'})
325+
326+
tm.assert_frame_equal(df1, df2)

0 commit comments

Comments
 (0)