Skip to content

ENH: support na_action in SparseArray.map #52096

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Mar 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ Other enhancements
- :class:`api.extensions.ExtensionArray` now has a :meth:`~api.extensions.ExtensionArray.map` method (:issue:`51809`)
- Improve error message when having incompatible columns using :meth:`DataFrame.merge` (:issue:`51861`)
- Improved error message when creating a DataFrame with empty data (0 rows), no index and an incorrect number of columns. (:issue:`52084`)
- :meth:`arrays.SparseArray.map` now supports ``na_action`` (:issue:`52096`).

.. ---------------------------------------------------------------------------
.. _whatsnew_210.notable_bug_fixes:
Expand Down Expand Up @@ -233,7 +234,7 @@ Reshaping

Sparse
^^^^^^
-
- Bug in :meth:`arrays.SparseArray.map` allowed the fill value to be included in the sparse values (:issue:`52095`)
-

ExtensionArray
Expand Down
29 changes: 15 additions & 14 deletions pandas/core/arrays/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1305,23 +1305,24 @@ def map(self, mapper, na_action=None) -> Self:
IntIndex
Indices: array([1, 2], dtype=int32)
"""
if na_action is not None:
raise NotImplementedError
is_map = isinstance(mapper, (abc.Mapping, ABCSeries))

# this is used in apply.
# We get hit since we're an "is_extension_array_dtype" but regular extension
# types are not hit. This may be worth adding to the interface.
if isinstance(mapper, ABCSeries):
mapper = mapper.to_dict()
fill_val = self.fill_value

if isinstance(mapper, abc.Mapping):
fill_value = mapper.get(self.fill_value, self.fill_value)
sp_values = [mapper.get(x, None) for x in self.sp_values]
else:
fill_value = mapper(self.fill_value)
sp_values = [mapper(x) for x in self.sp_values]
if na_action is None or notna(fill_val):
fill_val = mapper.get(fill_val, fill_val) if is_map else mapper(fill_val)

def func(sp_val):
new_sp_val = mapper.get(sp_val, None) if is_map else mapper(sp_val)
# check identity and equality because nans are not equal to each other
if new_sp_val is fill_val or new_sp_val == fill_val:
msg = "fill value in the sparse values not supported"
raise ValueError(msg)
return new_sp_val

sp_values = [func(x) for x in self.sp_values]

return type(self)(sp_values, sparse_index=self.sp_index, fill_value=fill_value)
return type(self)(sp_values, sparse_index=self.sp_index, fill_value=fill_val)

def to_dense(self) -> np.ndarray:
"""
Expand Down
27 changes: 20 additions & 7 deletions pandas/tests/extension/test_sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,14 +351,27 @@ def test_equals(self, data, na_value, as_series, box):
self._check_unsupported(data)
super().test_equals(data, na_value, as_series, box)

@pytest.mark.parametrize(
"func, na_action, expected",
[
(lambda x: x, None, SparseArray([1.0, np.nan])),
(lambda x: x, "ignore", SparseArray([1.0, np.nan])),
(str, None, SparseArray(["1.0", "nan"], fill_value="nan")),
(str, "ignore", SparseArray(["1.0", np.nan])),
],
)
def test_map(self, func, na_action, expected):
# GH52096
data = SparseArray([1, np.nan])
result = data.map(func, na_action=na_action)
self.assert_extension_array_equal(result, expected)

@pytest.mark.parametrize("na_action", [None, "ignore"])
def test_map(self, data, na_action):
if na_action is not None:
with pytest.raises(NotImplementedError, match=""):
data.map(lambda x: x, na_action=na_action)
else:
result = data.map(lambda x: x, na_action=na_action)
self.assert_extension_array_equal(result, data)
def test_map_raises(self, data, na_action):
# GH52096
msg = "fill value in the sparse values not supported"
with pytest.raises(ValueError, match=msg):
data.map(lambda x: np.nan, na_action=na_action)


class TestCasting(BaseSparseTests, base.BaseCastingTests):
Expand Down