Skip to content

Backport PR #30971 on branch 1.0.x (BUG: reductions for nullable dtypes should return pd.NA for skipna=False) #31121

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -483,6 +483,25 @@ Use :meth:`arrays.IntegerArray.to_numpy` with an explicit ``na_value`` instead.

a.to_numpy(dtype="float", na_value=np.nan)

**Reductions can return ``pd.NA``**

When performing a reduction such as a sum with ``skipna=False``, the result
will now be ``pd.NA`` instead of ``np.nan`` in presence of missing values
(:issue:`30958`).

*pandas 0.25.x*

.. code-block:: python

>>> pd.Series(a).sum(skipna=False)
nan

*pandas 1.0.0*

.. ipython:: python

pd.Series(a).sum(skipna=False)

**value_counts returns a nullable integer dtype**

:meth:`Series.value_counts` with a nullable integer dtype now returns a nullable
Expand Down
8 changes: 5 additions & 3 deletions pandas/core/arrays/boolean.py
Original file line number Diff line number Diff line change
Expand Up @@ -670,13 +670,15 @@ def _reduce(self, name, skipna=True, **kwargs):
mask = self._mask

# coerce to a nan-aware float if needed
if mask.any():
data = self._data.astype("float64")
data[mask] = np.nan
if self._hasna:
data = self.to_numpy("float64", na_value=np.nan)

op = getattr(nanops, "nan" + name)
result = op(data, axis=0, skipna=skipna, mask=mask, **kwargs)

if np.isnan(result):
return libmissing.NA

# if we have numeric op that would result in an int, coerce to int if possible
if name in ["sum", "prod"] and notna(result):
int_result = np.int64(result)
Expand Down
14 changes: 8 additions & 6 deletions pandas/core/arrays/integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
is_scalar,
)
from pandas.core.dtypes.dtypes import register_extension_dtype
from pandas.core.dtypes.missing import isna, notna
from pandas.core.dtypes.missing import isna

from pandas.core import nanops, ops
from pandas.core.ops import invalid_comparison
Expand Down Expand Up @@ -549,21 +549,23 @@ def _reduce(self, name, skipna=True, **kwargs):
mask = self._mask

# coerce to a nan-aware float if needed
if mask.any():
data = self._data.astype("float64")
# We explicitly use NaN within reductions.
data[mask] = np.nan
# (we explicitly use NaN within reductions)
if self._hasna:
data = self.to_numpy("float64", na_value=np.nan)

op = getattr(nanops, "nan" + name)
result = op(data, axis=0, skipna=skipna, mask=mask, **kwargs)

if np.isnan(result):
return libmissing.NA

# if we have a boolean op, don't coerce
if name in ["any", "all"]:
pass

# if we have a preservable numeric op,
# provide coercion back to an integer type if possible
elif name in ["sum", "min", "max", "prod"] and notna(result):
elif name in ["sum", "min", "max", "prod"]:
int_result = int(result)
if int_result == result:
result = int_result
Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/extension/test_boolean.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,9 @@ def check_reduce(self, s, op_name, skipna):
result = getattr(s, op_name)(skipna=skipna)
expected = getattr(s.astype("float64"), op_name)(skipna=skipna)
# override parent function to cast to bool for min/max
if op_name in ("min", "max") and not pd.isna(expected):
if np.isnan(expected):
expected = pd.NA
elif op_name in ("min", "max"):
expected = bool(expected)
tm.assert_almost_equal(result, expected)

Expand Down
10 changes: 9 additions & 1 deletion pandas/tests/extension/test_integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from pandas.core.dtypes.common import is_extension_array_dtype

import pandas as pd
import pandas._testing as tm
from pandas.core.arrays import integer_array
from pandas.core.arrays.integer import (
Int8Dtype,
Expand Down Expand Up @@ -233,7 +234,14 @@ class TestGroupby(base.BaseGroupbyTests):


class TestNumericReduce(base.BaseNumericReduceTests):
pass
def check_reduce(self, s, op_name, skipna):
# overwrite to ensure pd.NA is tested instead of np.nan
# https://github.com/pandas-dev/pandas/issues/30958
result = getattr(s, op_name)(skipna=skipna)
expected = getattr(s.astype("float64"), op_name)(skipna=skipna)
if np.isnan(expected):
expected = pd.NA
tm.assert_almost_equal(result, expected)


class TestBooleanReduce(base.BaseBooleanReduceTests):
Expand Down