From f8d8d255d65eb878df426ca845b699c78ba1f172 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 13 Apr 2020 17:32:11 -0500 Subject: [PATCH 1/5] ENH: Implement IntegerArray.sum --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/core/arrays/integer.py | 6 ++++++ pandas/tests/arrays/integer/test_function.py | 11 +++++++++++ 3 files changed, 18 insertions(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 2f4e961ff433f..e9dd6588bfafd 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -601,6 +601,7 @@ Other - Bug in :meth:`Series.map` not raising on invalid ``na_action`` (:issue:`32815`) - Bug in :meth:`DataFrame.__dir__` caused a segfault when using unicode surrogates in a column name (:issue:`25509`) - Bug in :meth:`DataFrame.plot.scatter` caused an error when plotting variable marker sizes (:issue:`32904`) +- :class:`IntegerArray` now implements the ``sum`` operation .. --------------------------------------------------------------------------- diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 5605b3fbc5dfa..e6247ac8cb5c1 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -573,6 +573,12 @@ def _reduce(self, name: str, skipna: bool = True, **kwargs): return result + def sum(self, skipna: bool = True, min_count: int = 0): + result = masked_reductions.sum( + values=self._data, mask=self._mask, skipna=skipna, min_count=min_count + ) + return result + def _maybe_mask_result(self, result, mask, other, op_name: str): """ Parameters diff --git a/pandas/tests/arrays/integer/test_function.py b/pandas/tests/arrays/integer/test_function.py index bdf902d1aca62..f65402f0d66b7 100644 --- a/pandas/tests/arrays/integer/test_function.py +++ b/pandas/tests/arrays/integer/test_function.py @@ -113,6 +113,17 @@ def test_value_counts_empty(): tm.assert_series_equal(result, expected) +@pytest.mark.parametrize("skipna", [True, False]) +@pytest.mark.parametrize("min_count", [0, 4]) +def test_integer_array_sum(skipna, min_count): + arr = pd.array([1, 2, 3, None], dtype="Int64") + result = arr.sum(skipna=skipna, min_count=min_count) + if skipna and min_count == 0: + assert result == 6 + else: + assert result is pd.NA + + # TODO(jreback) - these need testing / are broken # shift From 85d66abc9a7cddcf1549034a8b49257d5420c110 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 14 Apr 2020 15:54:07 -0500 Subject: [PATCH 2/5] Update signature and tests --- pandas/core/arrays/integer.py | 15 ++++++++++++++- pandas/tests/arrays/integer/test_function.py | 9 +++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index e6247ac8cb5c1..a31fb28a04505 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -7,6 +7,7 @@ from pandas._libs import lib, missing as libmissing from pandas._typing import ArrayLike from pandas.compat import set_function_name +from pandas.compat.numpy import function as nv from pandas.util._decorators import cache_readonly from pandas.core.dtypes.base import ExtensionDtype @@ -573,7 +574,19 @@ def _reduce(self, name: str, skipna: bool = True, **kwargs): return result - def sum(self, skipna: bool = True, min_count: int = 0): + def sum( + self, + axis=None, + dtype=None, + out=None, + keepdims=False, + initial=None, + skipna=True, + min_count=0, + ): + nv.validate_sum( + (), dict(dtype=dtype, out=out, keepdims=keepdims, initial=initial) + ) result = masked_reductions.sum( values=self._data, mask=self._mask, skipna=skipna, min_count=min_count ) diff --git a/pandas/tests/arrays/integer/test_function.py b/pandas/tests/arrays/integer/test_function.py index f65402f0d66b7..44c3077228e80 100644 --- a/pandas/tests/arrays/integer/test_function.py +++ b/pandas/tests/arrays/integer/test_function.py @@ -124,6 +124,15 @@ def test_integer_array_sum(skipna, min_count): assert result is pd.NA +@pytest.mark.parametrize( + "values, expected", [([1, 2, 3], 6), ([1, 2, 3, None], 6), ([None], 0)] +) +def test_integer_array_numpy_sum(values, expected): + arr = pd.array(values, dtype="Int64") + result = np.sum(arr) + assert result == expected + + # TODO(jreback) - these need testing / are broken # shift From aa7ee1a17ca7330257ad28dceb291e7f49cb2889 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 14 Apr 2020 16:31:28 -0500 Subject: [PATCH 3/5] PR num --- doc/source/whatsnew/v1.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 6affacc427b48..c4deea43bad0e 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -601,7 +601,7 @@ Other - Bug in :meth:`Series.map` not raising on invalid ``na_action`` (:issue:`32815`) - Bug in :meth:`DataFrame.__dir__` caused a segfault when using unicode surrogates in a column name (:issue:`25509`) - Bug in :meth:`DataFrame.plot.scatter` caused an error when plotting variable marker sizes (:issue:`32904`) -- :class:`IntegerArray` now implements the ``sum`` operation +- :class:`IntegerArray` now implements the ``sum`` operation (:issue:`33538`) .. --------------------------------------------------------------------------- From 5fdb58648a62aa4c4957e7762c415f2439810337 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Thu, 23 Apr 2020 14:44:16 -0500 Subject: [PATCH 4/5] Release note --- doc/source/whatsnew/v1.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index c4deea43bad0e..515d8d309520d 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -601,7 +601,7 @@ Other - Bug in :meth:`Series.map` not raising on invalid ``na_action`` (:issue:`32815`) - Bug in :meth:`DataFrame.__dir__` caused a segfault when using unicode surrogates in a column name (:issue:`25509`) - Bug in :meth:`DataFrame.plot.scatter` caused an error when plotting variable marker sizes (:issue:`32904`) -- :class:`IntegerArray` now implements the ``sum`` operation (:issue:`33538`) +- :class:`IntegerArray` now implements the ``sum`` operation (:issue:`33172`) .. --------------------------------------------------------------------------- From 32a6b4e30581d6a27c48cb8bb3f09ec3e930624b Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Fri, 24 Apr 2020 15:43:46 -0500 Subject: [PATCH 5/5] numpy compat --- pandas/compat/numpy/function.py | 1 + pandas/core/arrays/integer.py | 15 ++------------- 2 files changed, 3 insertions(+), 13 deletions(-) diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py index ccc970fb453c2..42714a450e552 100644 --- a/pandas/compat/numpy/function.py +++ b/pandas/compat/numpy/function.py @@ -252,6 +252,7 @@ def validate_cum_func_with_skipna(skipna, args, kwargs, name): STAT_FUNC_DEFAULTS["out"] = None PROD_DEFAULTS = SUM_DEFAULTS = STAT_FUNC_DEFAULTS.copy() +SUM_DEFAULTS["axis"] = None SUM_DEFAULTS["keepdims"] = False SUM_DEFAULTS["initial"] = None diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index a31fb28a04505..9d41071755e6f 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -574,19 +574,8 @@ def _reduce(self, name: str, skipna: bool = True, **kwargs): return result - def sum( - self, - axis=None, - dtype=None, - out=None, - keepdims=False, - initial=None, - skipna=True, - min_count=0, - ): - nv.validate_sum( - (), dict(dtype=dtype, out=out, keepdims=keepdims, initial=initial) - ) + def sum(self, skipna=True, min_count=0, **kwargs): + nv.validate_sum((), kwargs) result = masked_reductions.sum( values=self._data, mask=self._mask, skipna=skipna, min_count=min_count )