Skip to content

BUG: process Int64 as ints for preservable ops, not as float64 #32652

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,7 @@ Other
- :meth:`IntegerArray.astype` now supports ``datetime64`` dtype (:issue:32538`)
- Fixed bug in :func:`pandas.testing.assert_series_equal` where dtypes were checked for ``Interval`` and ``ExtensionArray`` operands when ``check_dtype`` was ``False`` (:issue:`32747`)
- Bug in :meth:`DataFrame.__dir__` caused a segfault when using unicode surrogates in a column name (:issue:`25509`)
- :meth:`IntegerArray.min` and :meth:`IntegerArray.max` no longer roundtrip through ``np.float64`` values, fixing precision for large integers (:issue:`32652`)

.. ---------------------------------------------------------------------------

Expand Down
9 changes: 7 additions & 2 deletions pandas/core/arrays/integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -560,10 +560,13 @@ def _reduce(self, name: str, skipna: bool = True, **kwargs):
data = self._data
mask = self._mask

preservable_ops = ["min", "max"]

# coerce to a nan-aware float if needed
# (we explicitly use NaN within reductions)
if self._hasna:
data = self.to_numpy("float64", na_value=np.nan)
if name not in preservable_ops or not skipna:
data = self.to_numpy("float64", na_value=np.nan)

op = getattr(nanops, "nan" + name)
result = op(data, axis=0, skipna=skipna, mask=mask, **kwargs)
Expand All @@ -577,9 +580,11 @@ def _reduce(self, name: str, skipna: bool = True, **kwargs):

# if we have a preservable numeric op,
# provide coercion back to an integer type if possible
elif name in ["sum", "min", "max", "prod"]:
elif name in preservable_ops + ["sum", "prod"]:
# GH#31409 more performant than casting-then-checking
result = com.cast_scalar_indexer(result)
if isinstance(result, np.integer):
result = int(result)

return result

Expand Down
16 changes: 11 additions & 5 deletions pandas/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,11 +183,17 @@ def _get_fill_value(
if fill_value_typ is None:
return iNaT
else:
if fill_value_typ == "+inf":
# need the max int here
return _int64_max
else:
return iNaT
dtype = getattr(dtype, "numpy_dtype", dtype)
try:
if fill_value_typ == "+inf":
return np.iinfo(dtype).max
else:
return np.iinfo(dtype).min
except ValueError:
if fill_value_typ == "+inf":
return _int64_max
else:
iNaT


def _maybe_get_mask(
Expand Down
6 changes: 5 additions & 1 deletion pandas/tests/extension/test_integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,11 @@ def check_reduce(self, s, op_name, skipna):
# overwrite to ensure pd.NA is tested instead of np.nan
# https://github.com/pandas-dev/pandas/issues/30958
result = getattr(s, op_name)(skipna=skipna)
expected = getattr(s.astype("float64"), op_name)(skipna=skipna)
preserved_ops = ["min", "max"]
if skipna and op_name in preserved_ops:
expected = getattr(s.dropna(), op_name)(skipna=True)
else:
expected = getattr(s.astype("float64"), op_name)(skipna=skipna)
if np.isnan(expected):
expected = pd.NA
tm.assert_almost_equal(result, expected)
Expand Down