Skip to content

Commit a35bdd1

Browse files
meeseeksmachinejorisvandenbossche
authored andcommitted
Backport PR pandas-dev#30971: BUG: reductions for nullable dtypes should return pd.NA for skipna=False (pandas-dev#31121)
Co-authored-by: Joris Van den Bossche <[email protected]>
1 parent b5357a6 commit a35bdd1

File tree

5 files changed

+44
-11
lines changed

5 files changed

+44
-11
lines changed

doc/source/whatsnew/v1.0.0.rst

+19
Original file line numberDiff line numberDiff line change
@@ -483,6 +483,25 @@ Use :meth:`arrays.IntegerArray.to_numpy` with an explicit ``na_value`` instead.
483483
484484
a.to_numpy(dtype="float", na_value=np.nan)
485485
486+
**Reductions can return ``pd.NA``**
487+
488+
When performing a reduction such as a sum with ``skipna=False``, the result
489+
will now be ``pd.NA`` instead of ``np.nan`` in presence of missing values
490+
(:issue:`30958`).
491+
492+
*pandas 0.25.x*
493+
494+
.. code-block:: python
495+
496+
>>> pd.Series(a).sum(skipna=False)
497+
nan
498+
499+
*pandas 1.0.0*
500+
501+
.. ipython:: python
502+
503+
pd.Series(a).sum(skipna=False)
504+
486505
**value_counts returns a nullable integer dtype**
487506

488507
:meth:`Series.value_counts` with a nullable integer dtype now returns a nullable

pandas/core/arrays/boolean.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -670,13 +670,15 @@ def _reduce(self, name, skipna=True, **kwargs):
670670
mask = self._mask
671671

672672
# coerce to a nan-aware float if needed
673-
if mask.any():
674-
data = self._data.astype("float64")
675-
data[mask] = np.nan
673+
if self._hasna:
674+
data = self.to_numpy("float64", na_value=np.nan)
676675

677676
op = getattr(nanops, "nan" + name)
678677
result = op(data, axis=0, skipna=skipna, mask=mask, **kwargs)
679678

679+
if np.isnan(result):
680+
return libmissing.NA
681+
680682
# if we have numeric op that would result in an int, coerce to int if possible
681683
if name in ["sum", "prod"] and notna(result):
682684
int_result = np.int64(result)

pandas/core/arrays/integer.py

+8-6
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
is_scalar,
2222
)
2323
from pandas.core.dtypes.dtypes import register_extension_dtype
24-
from pandas.core.dtypes.missing import isna, notna
24+
from pandas.core.dtypes.missing import isna
2525

2626
from pandas.core import nanops, ops
2727
from pandas.core.ops import invalid_comparison
@@ -549,21 +549,23 @@ def _reduce(self, name, skipna=True, **kwargs):
549549
mask = self._mask
550550

551551
# coerce to a nan-aware float if needed
552-
if mask.any():
553-
data = self._data.astype("float64")
554-
# We explicitly use NaN within reductions.
555-
data[mask] = np.nan
552+
# (we explicitly use NaN within reductions)
553+
if self._hasna:
554+
data = self.to_numpy("float64", na_value=np.nan)
556555

557556
op = getattr(nanops, "nan" + name)
558557
result = op(data, axis=0, skipna=skipna, mask=mask, **kwargs)
559558

559+
if np.isnan(result):
560+
return libmissing.NA
561+
560562
# if we have a boolean op, don't coerce
561563
if name in ["any", "all"]:
562564
pass
563565

564566
# if we have a preservable numeric op,
565567
# provide coercion back to an integer type if possible
566-
elif name in ["sum", "min", "max", "prod"] and notna(result):
568+
elif name in ["sum", "min", "max", "prod"]:
567569
int_result = int(result)
568570
if int_result == result:
569571
result = int_result

pandas/tests/extension/test_boolean.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -327,7 +327,9 @@ def check_reduce(self, s, op_name, skipna):
327327
result = getattr(s, op_name)(skipna=skipna)
328328
expected = getattr(s.astype("float64"), op_name)(skipna=skipna)
329329
# override parent function to cast to bool for min/max
330-
if op_name in ("min", "max") and not pd.isna(expected):
330+
if np.isnan(expected):
331+
expected = pd.NA
332+
elif op_name in ("min", "max"):
331333
expected = bool(expected)
332334
tm.assert_almost_equal(result, expected)
333335

pandas/tests/extension/test_integer.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from pandas.core.dtypes.common import is_extension_array_dtype
2020

2121
import pandas as pd
22+
import pandas._testing as tm
2223
from pandas.core.arrays import integer_array
2324
from pandas.core.arrays.integer import (
2425
Int8Dtype,
@@ -233,7 +234,14 @@ class TestGroupby(base.BaseGroupbyTests):
233234

234235

235236
class TestNumericReduce(base.BaseNumericReduceTests):
236-
pass
237+
def check_reduce(self, s, op_name, skipna):
238+
# overwrite to ensure pd.NA is tested instead of np.nan
239+
# https://github.com/pandas-dev/pandas/issues/30958
240+
result = getattr(s, op_name)(skipna=skipna)
241+
expected = getattr(s.astype("float64"), op_name)(skipna=skipna)
242+
if np.isnan(expected):
243+
expected = pd.NA
244+
tm.assert_almost_equal(result, expected)
237245

238246

239247
class TestBooleanReduce(base.BaseBooleanReduceTests):

0 commit comments

Comments
 (0)