From 107968f15533fc6a4adf58f7d80e9b13288be625 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Mon, 20 Mar 2023 22:14:06 +0000 Subject: [PATCH 1/7] ENH: make SparseArray.map support na_action --- doc/source/whatsnew/v2.1.0.rst | 3 ++- pandas/core/arrays/sparse/array.py | 18 ++++++++++-------- pandas/tests/extension/test_sparse.py | 27 ++++++++++++++++++++------- 3 files changed, 32 insertions(+), 16 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 3c18bb9371c33..987df3f670cd9 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -36,6 +36,7 @@ Other enhancements - :class:`api.extensions.ExtensionArray` now has a :meth:`~api.extensions.ExtensionArray.map` method (:issue:`51809`) - Improve error message when having incompatible columns using :meth:`DataFrame.merge` (:issue:`51861`) - Improved error message when creating a DataFrame with empty data (0 rows), no index and an incorrect number of columns. (:issue:`52084`) +- :meth:`arrays.SparseArray.map` now supports ``na_action`` (:issue:`xxxxx`). .. --------------------------------------------------------------------------- .. _whatsnew_210.notable_bug_fixes: @@ -233,7 +234,7 @@ Reshaping Sparse ^^^^^^ -- +- Bug in :meth:`arrays.SparseArray.map` allowed the fill value to be included in the sparse values (:issue:`52095`) - ExtensionArray diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 77dcfc463ed0c..151e352eb2820 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -1305,22 +1305,24 @@ def map(self, mapper, na_action=None) -> Self: IntIndex Indices: array([1, 2], dtype=int32) """ - if na_action is not None: - raise NotImplementedError - - # this is used in apply. - # We get hit since we're an "is_extension_array_dtype" but regular extension - # types are not hit. This may be worth adding to the interface. if isinstance(mapper, ABCSeries): mapper = mapper.to_dict() + fill_value = self.fill_value + if isinstance(mapper, abc.Mapping): - fill_value = mapper.get(self.fill_value, self.fill_value) + if na_action is None or notna(fill_value): + fill_value = mapper.get(self.fill_value, self.fill_value) sp_values = [mapper.get(x, None) for x in self.sp_values] else: - fill_value = mapper(self.fill_value) + if na_action is None or notna(fill_value): + fill_value = mapper(self.fill_value) sp_values = [mapper(x) for x in self.sp_values] + if fill_value in sp_values: + msg = "fill value in the sparse values not supported" + raise ValueError(msg) + return type(self)(sp_values, sparse_index=self.sp_index, fill_value=fill_value) def to_dense(self) -> np.ndarray: diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index e14de81d6fbd6..47dc4a4e99d6d 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -351,14 +351,27 @@ def test_equals(self, data, na_value, as_series, box): self._check_unsupported(data) super().test_equals(data, na_value, as_series, box) + @pytest.mark.parametrize( + "func, na_action, expected", + [ + (lambda x: x, None, SparseArray([1.0, np.nan])), + (lambda x: x, "ignore", SparseArray([1.0, np.nan])), + (str, None, SparseArray(["1.0", "nan"], fill_value="nan")), + (str, "ignore", SparseArray(["1.0", np.nan])), + ], + ) + def test_map(self, func, na_action, expected): + # GHxxxxx + data = SparseArray([1, np.nan]) + result = data.map(func, na_action=na_action) + self.assert_extension_array_equal(result, expected) + @pytest.mark.parametrize("na_action", [None, "ignore"]) - def test_map(self, data, na_action): - if na_action is not None: - with pytest.raises(NotImplementedError, match=""): - data.map(lambda x: x, na_action=na_action) - else: - result = data.map(lambda x: x, na_action=na_action) - self.assert_extension_array_equal(result, data) + def test_map_raises(self, data, na_action): + # GHxxxxx + msg = "fill value in the sparse values not supported" + with pytest.raises(ValueError, match=msg): + data.map(lambda x: np.nan, na_action=na_action) class TestCasting(BaseSparseTests, base.BaseCastingTests): From dcef4102269d0967498513d4fc4613281a048dcc Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Mon, 20 Mar 2023 23:50:28 +0000 Subject: [PATCH 2/7] add gh number --- doc/source/whatsnew/v2.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 987df3f670cd9..0df3e48a274f4 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -36,7 +36,7 @@ Other enhancements - :class:`api.extensions.ExtensionArray` now has a :meth:`~api.extensions.ExtensionArray.map` method (:issue:`51809`) - Improve error message when having incompatible columns using :meth:`DataFrame.merge` (:issue:`51861`) - Improved error message when creating a DataFrame with empty data (0 rows), no index and an incorrect number of columns. (:issue:`52084`) -- :meth:`arrays.SparseArray.map` now supports ``na_action`` (:issue:`xxxxx`). +- :meth:`arrays.SparseArray.map` now supports ``na_action`` (:issue:`52096`). .. --------------------------------------------------------------------------- .. _whatsnew_210.notable_bug_fixes: From 9af06c26df0b29945d7617ec1892affaa383b731 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Tue, 21 Mar 2023 06:43:01 +0000 Subject: [PATCH 3/7] avoid Series conversion & extra array lookup --- pandas/core/arrays/sparse/array.py | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 151e352eb2820..e95a0cdaf8e4a 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -1305,25 +1305,23 @@ def map(self, mapper, na_action=None) -> Self: IntIndex Indices: array([1, 2], dtype=int32) """ - if isinstance(mapper, ABCSeries): - mapper = mapper.to_dict() + is_map = isinstance(mapper, abc.Mapping) or isinstance(mapper, ABCSeries) - fill_value = self.fill_value + fill_val = self.fill_value - if isinstance(mapper, abc.Mapping): - if na_action is None or notna(fill_value): - fill_value = mapper.get(self.fill_value, self.fill_value) - sp_values = [mapper.get(x, None) for x in self.sp_values] - else: - if na_action is None or notna(fill_value): - fill_value = mapper(self.fill_value) - sp_values = [mapper(x) for x in self.sp_values] + if na_action is None or notna(fill_val): + fill_val = mapper.get(fill_val, fill_val) if is_map else mapper(fill_val) + + def func(sp_val): + new_sp_val = mapper.get(sp_val, None) if is_map else mapper(sp_val) + if new_sp_val in [fill_val]: + msg = "fill value in the sparse values not supported" + raise ValueError(msg) + return new_sp_val - if fill_value in sp_values: - msg = "fill value in the sparse values not supported" - raise ValueError(msg) + sp_values = [func(x) for x in self.sp_values] - return type(self)(sp_values, sparse_index=self.sp_index, fill_value=fill_value) + return type(self)(sp_values, sparse_index=self.sp_index, fill_value=fill_val) def to_dense(self) -> np.ndarray: """ From 8c85b15bac357864445be8960fc2843ab2b5f7dc Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Tue, 21 Mar 2023 07:23:08 +0000 Subject: [PATCH 4/7] fix pre-commit --- pandas/core/arrays/sparse/array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index e95a0cdaf8e4a..d19faeacf0779 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -1305,7 +1305,7 @@ def map(self, mapper, na_action=None) -> Self: IntIndex Indices: array([1, 2], dtype=int32) """ - is_map = isinstance(mapper, abc.Mapping) or isinstance(mapper, ABCSeries) + is_map = isinstance(mapper, (abc.Mapping, ABCSeries)) fill_val = self.fill_value From 5a152a913a9f9aacbea6c05ee1260052cc52a28e Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Tue, 21 Mar 2023 18:30:09 +0000 Subject: [PATCH 5/7] Update pandas/tests/extension/test_sparse.py Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- pandas/tests/extension/test_sparse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index 47dc4a4e99d6d..e80840ec4e9d0 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -361,7 +361,7 @@ def test_equals(self, data, na_value, as_series, box): ], ) def test_map(self, func, na_action, expected): - # GHxxxxx + # GH52096 data = SparseArray([1, np.nan]) result = data.map(func, na_action=na_action) self.assert_extension_array_equal(result, expected) From bdd3c13541607a481763b0cdcc58f746a983bce9 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Tue, 21 Mar 2023 18:30:20 +0000 Subject: [PATCH 6/7] Update pandas/tests/extension/test_sparse.py Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- pandas/tests/extension/test_sparse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index e80840ec4e9d0..842e495144e7d 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -368,7 +368,7 @@ def test_map(self, func, na_action, expected): @pytest.mark.parametrize("na_action", [None, "ignore"]) def test_map_raises(self, data, na_action): - # GHxxxxx + # GH52096 msg = "fill value in the sparse values not supported" with pytest.raises(ValueError, match=msg): data.map(lambda x: np.nan, na_action=na_action) From 36c7956c98b6628158b29e738f345b482f17551f Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Tue, 21 Mar 2023 18:48:17 +0000 Subject: [PATCH 7/7] fix comment --- pandas/core/arrays/sparse/array.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index d19faeacf0779..3365ea0f9db7e 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -1314,7 +1314,8 @@ def map(self, mapper, na_action=None) -> Self: def func(sp_val): new_sp_val = mapper.get(sp_val, None) if is_map else mapper(sp_val) - if new_sp_val in [fill_val]: + # check identity and equality because nans are not equal to each other + if new_sp_val is fill_val or new_sp_val == fill_val: msg = "fill value in the sparse values not supported" raise ValueError(msg) return new_sp_val