Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 07cbadc

Browse files
mahepejreback
authored andcommittedMay 18, 2019
BUG: groupby.agg returns incorrect results for uint64 cols (#26310) (#26359)
1 parent 3f417b8 commit 07cbadc

File tree

4 files changed

+29
-4
lines changed

4 files changed

+29
-4
lines changed
 

‎doc/source/whatsnew/v0.25.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -444,6 +444,7 @@ Groupby/Resample/Rolling
444444
- Bug in :meth:`pandas.core.groupby.GroupBy.cumsum`, :meth:`pandas.core.groupby.GroupBy.cumprod`, :meth:`pandas.core.groupby.GroupBy.cummin` and :meth:`pandas.core.groupby.GroupBy.cummax` with categorical column having absent categories, would return incorrect result or segfault (:issue:`16771`)
445445
- Bug in :meth:`pandas.core.groupby.GroupBy.nth` where NA values in the grouping would return incorrect results (:issue:`26011`)
446446
- Bug in :meth:`pandas.core.groupby.SeriesGroupBy.transform` where transforming an empty group would raise error (:issue:`26208`)
447+
- Bug in :meth:`pandas.core.groupby.GroupBy.agg` where incorrect results are returned for uint64 columns. (:issue:`26310`)
447448

448449

449450
Reshaping

‎pandas/core/dtypes/common.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
is_named_tuple, is_nested_list_like, is_number, is_re, is_re_compilable,
2020
is_scalar, is_sequence, is_string_like)
2121

22+
from pandas._typing import ArrayLike
23+
2224
_POSSIBLY_CAST_DTYPES = {np.dtype(t).name
2325
for t in ['O', 'int8', 'uint8', 'int16', 'uint16',
2426
'int32', 'uint32', 'int64', 'uint64']}
@@ -87,10 +89,10 @@ def ensure_categorical(arr):
8789
return arr
8890

8991

90-
def ensure_int64_or_float64(arr, copy=False):
92+
def ensure_int_or_float(arr: ArrayLike, copy=False) -> np.array:
9193
"""
9294
Ensure that an dtype array of some integer dtype
93-
has an int64 dtype if possible
95+
has an int64 dtype if possible.
9496
If it's not possible, potentially because of overflow,
9597
convert the array to float64 instead.
9698
@@ -107,9 +109,18 @@ def ensure_int64_or_float64(arr, copy=False):
107109
out_arr : The input array cast as int64 if
108110
possible without overflow.
109111
Otherwise the input array cast to float64.
112+
113+
Notes
114+
-----
115+
If the array is explicitly of type uint64 the type
116+
will remain unchanged.
110117
"""
111118
try:
112119
return arr.astype('int64', copy=copy, casting='safe')
120+
except TypeError:
121+
pass
122+
try:
123+
return arr.astype('uint64', copy=copy, casting='safe')
113124
except TypeError:
114125
return arr.astype('float64', copy=copy)
115126

‎pandas/core/groupby/ops.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from pandas.util._decorators import cache_readonly
1818

1919
from pandas.core.dtypes.common import (
20-
ensure_float64, ensure_int64, ensure_int64_or_float64, ensure_object,
20+
ensure_float64, ensure_int64, ensure_int_or_float, ensure_object,
2121
ensure_platform_int, is_bool_dtype, is_categorical_dtype, is_complex_dtype,
2222
is_datetime64_any_dtype, is_integer_dtype, is_numeric_dtype,
2323
is_timedelta64_dtype, needs_i8_conversion)
@@ -486,7 +486,7 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1,
486486
if (values == iNaT).any():
487487
values = ensure_float64(values)
488488
else:
489-
values = ensure_int64_or_float64(values)
489+
values = ensure_int_or_float(values)
490490
elif is_numeric and not is_complex_dtype(values):
491491
values = ensure_float64(values)
492492
else:

‎pandas/tests/groupby/aggregate/test_aggregate.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -313,3 +313,16 @@ def test_order_aggregate_multiple_funcs():
313313
expected = pd.Index(['sum', 'max', 'mean', 'ohlc', 'min'])
314314

315315
tm.assert_index_equal(result, expected)
316+
317+
318+
@pytest.mark.parametrize('dtype', [np.int64, np.uint64])
319+
@pytest.mark.parametrize('how', ['first', 'last', 'min',
320+
'max', 'mean', 'median'])
321+
def test_uint64_type_handling(dtype, how):
322+
# GH 26310
323+
df = pd.DataFrame({'x': 6903052872240755750, 'y': [1, 2]})
324+
expected = df.groupby('y').agg({'x': how})
325+
df.x = df.x.astype(dtype)
326+
result = df.groupby('y').agg({'x': how})
327+
result.x = result.x.astype(np.int64)
328+
tm.assert_frame_equal(result, expected, check_exact=True)

0 commit comments

Comments
 (0)
Please sign in to comment.