Skip to content

Commit db1344a

Browse files
committed
BUG: process Int64 as ints for preservable ops, not as float64
1 parent f76763f commit db1344a

File tree

4 files changed

+24
-8
lines changed

4 files changed

+24
-8
lines changed

doc/source/whatsnew/v1.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -406,6 +406,7 @@ Other
406406
- :meth:`IntegerArray.astype` now supports ``datetime64`` dtype (:issue:32538`)
407407
- Fixed bug in :func:`pandas.testing.assert_series_equal` where dtypes were checked for ``Interval`` and ``ExtensionArray`` operands when ``check_dtype`` was ``False`` (:issue:`32747`)
408408
- Bug in :meth:`DataFrame.__dir__` caused a segfault when using unicode surrogates in a column name (:issue:`25509`)
409+
- :meth:`IntegerArray.min` and :meth:`IntegerArray.max` no longer roundtrip through ``np.float64`` values, fixing precision for large integers (:issue:`32652`)
409410

410411
.. ---------------------------------------------------------------------------
411412

pandas/core/arrays/integer.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -560,10 +560,13 @@ def _reduce(self, name: str, skipna: bool = True, **kwargs):
560560
data = self._data
561561
mask = self._mask
562562

563+
preservable_ops = ["min", "max"]
564+
563565
# coerce to a nan-aware float if needed
564566
# (we explicitly use NaN within reductions)
565567
if self._hasna:
566-
data = self.to_numpy("float64", na_value=np.nan)
568+
if name not in preservable_ops or not skipna:
569+
data = self.to_numpy("float64", na_value=np.nan)
567570

568571
op = getattr(nanops, "nan" + name)
569572
result = op(data, axis=0, skipna=skipna, mask=mask, **kwargs)
@@ -577,9 +580,11 @@ def _reduce(self, name: str, skipna: bool = True, **kwargs):
577580

578581
# if we have a preservable numeric op,
579582
# provide coercion back to an integer type if possible
580-
elif name in ["sum", "min", "max", "prod"]:
583+
elif name in preservable_ops + ["sum", "prod"]:
581584
# GH#31409 more performant than casting-then-checking
582585
result = com.cast_scalar_indexer(result)
586+
if isinstance(result, np.integer):
587+
result = int(result)
583588

584589
return result
585590

pandas/core/nanops.py

+11-5
Original file line numberDiff line numberDiff line change
@@ -183,11 +183,17 @@ def _get_fill_value(
183183
if fill_value_typ is None:
184184
return iNaT
185185
else:
186-
if fill_value_typ == "+inf":
187-
# need the max int here
188-
return _int64_max
189-
else:
190-
return iNaT
186+
dtype = getattr(dtype, "numpy_dtype", dtype)
187+
try:
188+
if fill_value_typ == "+inf":
189+
return np.iinfo(dtype).max
190+
else:
191+
return np.iinfo(dtype).min
192+
except ValueError:
193+
if fill_value_typ == "+inf":
194+
return _int64_max
195+
else:
196+
iNaT
191197

192198

193199
def _maybe_get_mask(

pandas/tests/extension/test_integer.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,11 @@ def check_reduce(self, s, op_name, skipna):
238238
# overwrite to ensure pd.NA is tested instead of np.nan
239239
# https://github.com/pandas-dev/pandas/issues/30958
240240
result = getattr(s, op_name)(skipna=skipna)
241-
expected = getattr(s.astype("float64"), op_name)(skipna=skipna)
241+
preserved_ops = ["min", "max"]
242+
if skipna and op_name in preserved_ops:
243+
expected = getattr(s.dropna(), op_name)(skipna=True)
244+
else:
245+
expected = getattr(s.astype("float64"), op_name)(skipna=skipna)
242246
if np.isnan(expected):
243247
expected = pd.NA
244248
tm.assert_almost_equal(result, expected)

0 commit comments

Comments
 (0)