Skip to content

ENH: Allow s.map(d, na_action='raise') #60482

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
12 changes: 10 additions & 2 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,10 +402,18 @@ def nselect_method(request):
return request.param


@pytest.fixture(params=[None, "ignore"])
@pytest.fixture(params=[None, "ignore", "raise"])
def na_action(request):
"""
Fixture for 'na_action' argument in map.
Fixture for 'na_action' argument in Series.map.
"""
return request.param


@pytest.fixture(params=[None, "ignore"])
def na_action_frame(request):
"""
Fixture for 'na_action' argument in DataFrame.map.
"""
return request.param

Expand Down
15 changes: 11 additions & 4 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -1627,7 +1627,7 @@ def union_with_duplicates(
def map_array(
arr: ArrayLike,
mapper,
na_action: Literal["ignore"] | None = None,
na_action: Literal["ignore", "raise"] | None = None,
) -> np.ndarray | ExtensionArray | Index:
"""
Map values using an input mapping or function.
Expand All @@ -1636,9 +1636,12 @@ def map_array(
----------
mapper : function, dict, or Series
Mapping correspondence.
na_action : {None, 'ignore'}, default None
na_action : {None, 'ignore', 'raise'}, default None
If 'ignore', propagate NA values, without passing them to the
mapping correspondence.
mapping correspondence. If 'raise', an error is raised when the
array contains non-NA values which do not exist as keys in the mapping
correspondence (does not apply to function & dict-like mappers with
a '__missing__' attribute).

Returns
-------
Expand All @@ -1647,7 +1650,7 @@ def map_array(
If the function returns a tuple with more than one element
a MultiIndex will be returned.
"""
if na_action not in (None, "ignore"):
if na_action not in (None, "ignore", "raise"):
msg = f"na_action must either be 'ignore' or None, {na_action} was passed"
raise ValueError(msg)

Expand Down Expand Up @@ -1686,6 +1689,10 @@ def map_array(
# Since values were input this means we came from either
# a dict or a series and mapper should be an index
indexer = mapper.index.get_indexer(arr)

if na_action == "raise" and (indexer == -1).any():
raise ValueError("At least one value is not covered in the mapping!")

new_values = take_nd(mapper._values, indexer)

return new_values
Expand Down
11 changes: 7 additions & 4 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -4321,7 +4321,7 @@ def unstack(
def map(
self,
arg: Callable | Mapping | Series,
na_action: Literal["ignore"] | None = None,
na_action: Literal["ignore", "raise"] | None = None,
**kwargs,
) -> Series:
"""
Expand All @@ -4335,9 +4335,11 @@ def map(
----------
arg : function, collections.abc.Mapping subclass or Series
Mapping correspondence.
na_action : {None, 'ignore'}, default None
na_action : {None, 'ignore', 'raise'}, default None
If 'ignore', propagate NaN values, without passing them to the
mapping correspondence.
mapping correspondence. With 'raise' a missing value in the mapping
correspondence raises a ``ValueError`` instead of replacing it
with ``NaN``.
**kwargs
Additional keyword arguments to pass as keywords arguments to
`arg`.
Expand All @@ -4359,7 +4361,8 @@ def map(
Notes
-----
When ``arg`` is a dictionary, values in Series that are not in the
dictionary (as keys) are converted to ``NaN``. However, if the
dictionary (as keys) are converted to ``NaN``. This conversion
can be anticipated with ``na_action = 'raise'``. However, if the
dictionary is a ``dict`` subclass that defines ``__missing__`` (i.e.
provides a method for default values), then this default is used
rather than ``NaN``.
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/frame/methods/test_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def test_map_float_object_conversion(val):
assert result == object


def test_map_keeps_dtype(na_action):
def test_map_keeps_dtype(na_action_frame):
# GH52219
arr = Series(["a", np.nan, "b"])
sparse_arr = arr.astype(pd.SparseDtype(object))
Expand All @@ -42,15 +42,15 @@ def test_map_keeps_dtype(na_action):
def func(x):
return str.upper(x) if not pd.isna(x) else x

result = df.map(func, na_action=na_action)
result = df.map(func, na_action=na_action_frame)

expected_sparse = pd.array(["A", np.nan, "B"], dtype=pd.SparseDtype(object))
expected_arr = expected_sparse.astype(object)
expected = DataFrame({"a": expected_arr, "b": expected_sparse})

tm.assert_frame_equal(result, expected)

result_empty = df.iloc[:0, :].map(func, na_action=na_action)
result_empty = df.iloc[:0, :].map(func, na_action=na_action_frame)
expected_empty = expected.iloc[:0, :]
tm.assert_frame_equal(result_empty, expected_empty)

Expand Down
43 changes: 33 additions & 10 deletions pandas/tests/series/methods/test_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,19 @@ def test_map_dict_na_key():
tm.assert_series_equal(result, expected)


def test_map_missing_key(na_action):
s = Series([1, 2, 42])
mapping = {1: "a", 2: "b", 3: "c"}

if na_action == "raise":
with pytest.raises(ValueError, match="not covered"):
s.map(mapping, na_action=na_action)
else:
expected = Series(["a", "b", np.nan])
result = s.map(mapping, na_action=na_action)
tm.assert_series_equal(result, expected)


def test_map_defaultdict_na_key(na_action):
# GH 48813
s = Series([1, 2, np.nan])
Expand Down Expand Up @@ -380,7 +393,7 @@ def test_map_categorical_na_ignore(na_action, expected):
tm.assert_series_equal(result, expected)


def test_map_dict_subclass_with_missing():
def test_map_dict_subclass_with_missing(na_action):
"""
Test Series.map with a dictionary subclass that defines __missing__,
i.e. sets a default value (GH #15999).
Expand All @@ -392,30 +405,40 @@ def __missing__(self, key):

s = Series([1, 2, 3])
dictionary = DictWithMissing({3: "three"})
result = s.map(dictionary)
result = s.map(dictionary, na_action=na_action) # also works with 'raise'
expected = Series(["missing", "missing", "three"])
tm.assert_series_equal(result, expected)


def test_map_dict_subclass_without_missing():
def test_map_dict_subclass_without_missing(na_action):
class DictWithoutMissing(dict):
pass

s = Series([1, 2, 3])
dictionary = DictWithoutMissing({3: "three"})
result = s.map(dictionary)
expected = Series([np.nan, np.nan, "three"])
tm.assert_series_equal(result, expected)

if na_action == "raise":
with pytest.raises(ValueError, match="not covered"):
_ = s.map(dictionary, na_action=na_action)
else:
result = s.map(dictionary, na_action=na_action)
expected = Series([np.nan, np.nan, "three"])
tm.assert_series_equal(result, expected)

def test_map_abc_mapping(non_dict_mapping_subclass):

def test_map_abc_mapping(non_dict_mapping_subclass, na_action):
# https://github.com/pandas-dev/pandas/issues/29733
# Check collections.abc.Mapping support as mapper for Series.map
s = Series([1, 2, 3])
not_a_dictionary = non_dict_mapping_subclass({3: "three"})
result = s.map(not_a_dictionary)
expected = Series([np.nan, np.nan, "three"])
tm.assert_series_equal(result, expected)

if na_action == "raise":
with pytest.raises(ValueError, match="not covered"):
_ = s.map(not_a_dictionary, na_action=na_action)
else:
result = s.map(not_a_dictionary, na_action=na_action)
expected = Series([np.nan, np.nan, "three"])
tm.assert_series_equal(result, expected)


def test_map_abc_mapping_with_missing(non_dict_mapping_subclass):
Expand Down
Loading