Skip to content

Commit 57c4940

Browse files
ENH: support na_action in SparseArray.map (#52096)
* ENH: make SparseArray.map support na_action * add gh number * avoid Series conversion & extra array lookup * fix pre-commit * Update pandas/tests/extension/test_sparse.py Co-authored-by: Matthew Roeschke <[email protected]> * Update pandas/tests/extension/test_sparse.py Co-authored-by: Matthew Roeschke <[email protected]> * fix comment --------- Co-authored-by: Matthew Roeschke <[email protected]>
1 parent 673f023 commit 57c4940

File tree

3 files changed

+37
-22
lines changed

3 files changed

+37
-22
lines changed

doc/source/whatsnew/v2.1.0.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ Other enhancements
3636
- :class:`api.extensions.ExtensionArray` now has a :meth:`~api.extensions.ExtensionArray.map` method (:issue:`51809`)
3737
- Improve error message when having incompatible columns using :meth:`DataFrame.merge` (:issue:`51861`)
3838
- Improved error message when creating a DataFrame with empty data (0 rows), no index and an incorrect number of columns. (:issue:`52084`)
39+
- :meth:`arrays.SparseArray.map` now supports ``na_action`` (:issue:`52096`).
3940

4041
.. ---------------------------------------------------------------------------
4142
.. _whatsnew_210.notable_bug_fixes:
@@ -236,7 +237,7 @@ Reshaping
236237

237238
Sparse
238239
^^^^^^
239-
-
240+
- Bug in :meth:`arrays.SparseArray.map` allowed the fill value to be included in the sparse values (:issue:`52095`)
240241
-
241242

242243
ExtensionArray

pandas/core/arrays/sparse/array.py

+15-14
Original file line numberDiff line numberDiff line change
@@ -1305,23 +1305,24 @@ def map(self, mapper, na_action=None) -> Self:
13051305
IntIndex
13061306
Indices: array([1, 2], dtype=int32)
13071307
"""
1308-
if na_action is not None:
1309-
raise NotImplementedError
1308+
is_map = isinstance(mapper, (abc.Mapping, ABCSeries))
13101309

1311-
# this is used in apply.
1312-
# We get hit since we're an "is_extension_array_dtype" but regular extension
1313-
# types are not hit. This may be worth adding to the interface.
1314-
if isinstance(mapper, ABCSeries):
1315-
mapper = mapper.to_dict()
1310+
fill_val = self.fill_value
13161311

1317-
if isinstance(mapper, abc.Mapping):
1318-
fill_value = mapper.get(self.fill_value, self.fill_value)
1319-
sp_values = [mapper.get(x, None) for x in self.sp_values]
1320-
else:
1321-
fill_value = mapper(self.fill_value)
1322-
sp_values = [mapper(x) for x in self.sp_values]
1312+
if na_action is None or notna(fill_val):
1313+
fill_val = mapper.get(fill_val, fill_val) if is_map else mapper(fill_val)
1314+
1315+
def func(sp_val):
1316+
new_sp_val = mapper.get(sp_val, None) if is_map else mapper(sp_val)
1317+
# check identity and equality because nans are not equal to each other
1318+
if new_sp_val is fill_val or new_sp_val == fill_val:
1319+
msg = "fill value in the sparse values not supported"
1320+
raise ValueError(msg)
1321+
return new_sp_val
1322+
1323+
sp_values = [func(x) for x in self.sp_values]
13231324

1324-
return type(self)(sp_values, sparse_index=self.sp_index, fill_value=fill_value)
1325+
return type(self)(sp_values, sparse_index=self.sp_index, fill_value=fill_val)
13251326

13261327
def to_dense(self) -> np.ndarray:
13271328
"""

pandas/tests/extension/test_sparse.py

+20-7
Original file line numberDiff line numberDiff line change
@@ -351,14 +351,27 @@ def test_equals(self, data, na_value, as_series, box):
351351
self._check_unsupported(data)
352352
super().test_equals(data, na_value, as_series, box)
353353

354+
@pytest.mark.parametrize(
355+
"func, na_action, expected",
356+
[
357+
(lambda x: x, None, SparseArray([1.0, np.nan])),
358+
(lambda x: x, "ignore", SparseArray([1.0, np.nan])),
359+
(str, None, SparseArray(["1.0", "nan"], fill_value="nan")),
360+
(str, "ignore", SparseArray(["1.0", np.nan])),
361+
],
362+
)
363+
def test_map(self, func, na_action, expected):
364+
# GH52096
365+
data = SparseArray([1, np.nan])
366+
result = data.map(func, na_action=na_action)
367+
self.assert_extension_array_equal(result, expected)
368+
354369
@pytest.mark.parametrize("na_action", [None, "ignore"])
355-
def test_map(self, data, na_action):
356-
if na_action is not None:
357-
with pytest.raises(NotImplementedError, match=""):
358-
data.map(lambda x: x, na_action=na_action)
359-
else:
360-
result = data.map(lambda x: x, na_action=na_action)
361-
self.assert_extension_array_equal(result, data)
370+
def test_map_raises(self, data, na_action):
371+
# GH52096
372+
msg = "fill value in the sparse values not supported"
373+
with pytest.raises(ValueError, match=msg):
374+
data.map(lambda x: np.nan, na_action=na_action)
362375

363376

364377
class TestCasting(BaseSparseTests, base.BaseCastingTests):

0 commit comments

Comments
 (0)