pandas-dev · mroeschke · Mar 30, 2023 · Feb 24, 2023 · Feb 26, 2023 · Mar 4, 2023
diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
@@ -28,6 +28,9 @@ enhancement2
 
 Other enhancements
 ^^^^^^^^^^^^^^^^^^
+- :meth:`Categorical.map` and :meth:`CategoricalIndex.map` now have a ``na_action`` parameter.
+  :meth:`Categorical.map` implicitly had a default value of ``"ignore"`` for ``na_action``. This has formally been deprecated and will be changed to ``None`` in the future.
+  Also notice that :meth:`Series.map` has default ``na_action=None`` and calls to series with categorical data will now use ``na_action=None`` unless explicitly set otherwise (:issue:`44279`)
 - Implemented ``__pandas_priority__`` to allow custom types to take precedence over :class:`DataFrame`, :class:`Series`, :class:`Index`, or :class:`ExtensionArray` for arithmetic operations, :ref:`see the developer guide <extending.pandas_priority>` (:issue:`48347`)
 - :meth:`MultiIndex.sort_values` now supports ``na_position`` (:issue:`51612`)
 - :meth:`MultiIndex.sortlevel` and :meth:`Index.sortlevel` gained a new keyword ``na_position`` (:issue:`51612`)
@@ -38,6 +41,8 @@ Other enhancements
 - Improved error message when creating a DataFrame with empty data (0 rows), no index and an incorrect number of columns. (:issue:`52084`)
 - :meth:`DataFrame.applymap` now uses the :meth:`~api.extensions.ExtensionArray.map` method of underlying :class:`api.extensions.ExtensionArray` instances (:issue:`52219`)
 - :meth:`arrays.SparseArray.map` now supports ``na_action`` (:issue:`52096`).
+- :meth:`Categorical.map` and :meth:`CategoricalIndex.map` now have a ``na_action`` parameter (:issue:`44279`)
+-
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_210.notable_bug_fixes:
@@ -146,7 +151,7 @@ Bug fixes
 
 Categorical
 ^^^^^^^^^^^
--
+- Bug in :meth:`Series.map` , where the value of the ``na_action`` parameter was not used if the series held a :class:`Categorical` (:issue:`22527`).
 -
 
 Datetimelike

diff --git a/pandas/core/apply.py b/pandas/core/apply.py
@@ -39,6 +39,7 @@
 
 from pandas.core.dtypes.cast import is_nested_object
 from pandas.core.dtypes.common import (
+    is_categorical_dtype,
     is_dict_like,
     is_extension_array_dtype,
     is_list_like,
@@ -1082,7 +1083,11 @@ def apply_standard(self) -> DataFrame | Series:
                 return f(obj)
 
         # row-wise access
-        mapped = obj._map_values(mapper=f, convert=self.convert_dtype)
+        # apply doesn't have a `na_action` keyword and for backward compat reasons
+        # we need to give `na_action="ignore"` for categorical data.
+        # TODO: remove the `na_action="ignore"` has been removed from Categorical.
+        action = "ignore" if is_categorical_dtype(obj) else None
+        mapped = obj._map_values(mapper=f, na_action=action, convert=self.convert_dtype)
 
         if len(mapped) and isinstance(mapped[0], ABCSeries):
             # GH#43986 Need to do list(mapped) in order to get treated as nested

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -1205,7 +1205,11 @@ def remove_unused_categories(self) -> Categorical:
 
     # ------------------------------------------------------------------
 
-    def map(self, mapper, na_action=None):
+    def map(
+        self,
+        mapper,
+        na_action: Literal["ignore"] | None | lib.NoDefault = lib.no_default,
+    ):
         """
         Map categories using an input mapping or function.
 
@@ -1222,6 +1226,14 @@ def map(self, mapper, na_action=None):
         ----------
         mapper : function, dict, or Series
             Mapping correspondence.
+        na_action : {None, 'ignore'}, default 'ignore'
+            If 'ignore', propagate NaN values, without passing them to the
+            mapping correspondence.
+
+            .. deprecated:: 2.1.0
+
+               The dault value of 'ignore' has been deprecated and will be changed to
-               The dault value of 'ignore' has been deprecated and will be changed to
+               The default value of 'ignore' has been deprecated and will be changed to
-               The dault value of 'ignore' has been deprecated and will be changed to
+               The default value of 'ignore' has been deprecated and will be changed to
+               None in the future.
 
         Returns
         -------
@@ -1245,10 +1257,10 @@ def map(self, mapper, na_action=None):
         >>> cat
         ['a', 'b', 'c']
         Categories (3, object): ['a', 'b', 'c']
-        >>> cat.map(lambda x: x.upper())
+        >>> cat.map(lambda x: x.upper(), na_action=None)
         ['A', 'B', 'C']
         Categories (3, object): ['A', 'B', 'C']
-        >>> cat.map({'a': 'first', 'b': 'second', 'c': 'third'})
+        >>> cat.map({'a': 'first', 'b': 'second', 'c': 'third'}, na_action=None)
         ['first', 'second', 'third']
         Categories (3, object): ['first', 'second', 'third']
 
@@ -1259,35 +1271,50 @@ def map(self, mapper, na_action=None):
         >>> cat
         ['a', 'b', 'c']
         Categories (3, object): ['a' < 'b' < 'c']
-        >>> cat.map({'a': 3, 'b': 2, 'c': 1})
+        >>> cat.map({'a': 3, 'b': 2, 'c': 1}, na_action=None)
         [3, 2, 1]
         Categories (3, int64): [3 < 2 < 1]
 
         If the mapping is not one-to-one an :class:`~pandas.Index` is returned:
 
-        >>> cat.map({'a': 'first', 'b': 'second', 'c': 'first'})
+        >>> cat.map({'a': 'first', 'b': 'second', 'c': 'first'}, na_action=None)
         Index(['first', 'second', 'first'], dtype='object')
 
         If a `dict` is used, all unmapped categories are mapped to `NaN` and
         the result is an :class:`~pandas.Index`:
 
-        >>> cat.map({'a': 'first', 'b': 'second'})
+        >>> cat.map({'a': 'first', 'b': 'second'}, na_action=None)
         Index(['first', 'second', nan], dtype='object')
         """
-        if na_action is not None:
-            raise NotImplementedError
+        if na_action is lib.no_default:
+            warnings.warn(
+                "The default value of 'ignore' for the `na_action` parameter in "
+                "pandas.Categorical.map is deprecated and will be "
+                "changed to 'None' in a future version. Please set na_action to the "
+                "desired value to avoid seeing this warning",
+                FutureWarning,
+                stacklevel=find_stack_level(),
+            )
+            na_action = "ignore"
+
+        assert callable(mapper) or is_dict_like(mapper)
 
         new_categories = self.categories.map(mapper)
-        try:
-            return self.from_codes(
-                self._codes.copy(), categories=new_categories, ordered=self.ordered
-            )
-        except ValueError:
-            # NA values are represented in self._codes with -1
-            # np.take causes NA values to take final element in new_categories
-            if np.any(self._codes == -1):
-                new_categories = new_categories.insert(len(new_categories), np.nan)
-            return np.take(new_categories, self._codes)
+
+        has_nans = np.any(self._codes == -1)
+
+        na_val = np.nan
+        if na_action is None and has_nans:
+            na_val = mapper(np.nan) if callable(mapper) else mapper.get(np.nan, np.nan)
+
+        if new_categories.is_unique and not new_categories.hasnans and na_val is np.nan:
+            new_dtype = CategoricalDtype(new_categories, ordered=self.ordered)
+            return self.from_codes(self._codes.copy(), dtype=new_dtype)
+
+        if has_nans:
+            new_categories = new_categories.insert(len(new_categories), na_val)
+
+        return np.take(new_categories, self._codes)
 
     __eq__ = _cat_compare_op(operator.eq)
     __ne__ = _cat_compare_op(operator.ne)

diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
@@ -4,6 +4,7 @@
     TYPE_CHECKING,
     Any,
     Hashable,
+    Literal,
 )
 
 import numpy as np
@@ -402,7 +403,7 @@ def _maybe_cast_listlike_indexer(self, values) -> CategoricalIndex:
     def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
         return self.categories._is_comparable_dtype(dtype)
 
-    def map(self, mapper):
+    def map(self, mapper, na_action: Literal["ignore"] | None = None):
         """
         Map values using input an input mapping or function.
 
@@ -469,7 +470,7 @@ def map(self, mapper):
         >>> idx.map({'a': 'first', 'b': 'second'})
         Index(['first', 'second', nan], dtype='object')
         """
-        mapped = self._values.map(mapper)
+        mapped = self._values.map(mapper, na_action=na_action)
         return Index(mapped, name=self.name)
 
     def _concat(self, to_concat: list[Index], name: Hashable) -> Index:

diff --git a/pandas/tests/apply/test_invalid_arg.py b/pandas/tests/apply/test_invalid_arg.py
@@ -15,7 +15,6 @@
 from pandas.errors import SpecificationError
 
 from pandas import (
-    Categorical,
     DataFrame,
     Series,
     date_range,
@@ -76,13 +75,6 @@ def test_map_arg_is_dict_with_invalid_na_action_raises(input_na_action):
         s.map({1: 2}, na_action=input_na_action)
 
 
-def test_map_categorical_na_action():
-    values = Categorical(list("ABBABCD"), categories=list("DCBA"), ordered=True)
-    s = Series(values, name="XX", index=list("abcdefg"))
-    with pytest.raises(NotImplementedError, match=tm.EMPTY_STRING_PATTERN):
-        s.map(lambda x: x, na_action="ignore")
-
-
 @pytest.mark.parametrize("method", ["apply", "agg", "transform"])
 @pytest.mark.parametrize("func", [{"A": {"B": "sum"}}, {"A": {"B": ["sum"]}}])
 def test_nested_renamer(frame_or_series, method, func):

diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py
@@ -649,12 +649,15 @@ def test_map_defaultdict_ignore_na():
     tm.assert_series_equal(result, expected)
 
 
-def test_map_categorical_na_ignore():
+@pytest.mark.parametrize(
+    "na_action, expected",
+    [(None, Series([10.0, 42.0, np.nan])), ("ignore", Series([10, np.nan, np.nan]))],
+)
+def test_map_categorical_na_ignore(na_action, expected):
     # GH#47527
-    values = pd.Categorical([1, np.nan, 2], categories=[10, 1])
+    values = pd.Categorical([1, np.nan, 2], categories=[10, 1, 2])
     ser = Series(values)
-    result = ser.map({1: 10, np.nan: 42})
-    expected = Series([10, np.nan, np.nan])
+    result = ser.map({1: 10, np.nan: 42}, na_action=na_action)
     tm.assert_series_equal(result, expected)
 
 
@@ -748,22 +751,45 @@ def test_map_box():
     tm.assert_series_equal(res, exp)
 
 
-def test_map_categorical():
+@pytest.mark.parametrize("na_action", [None, "ignore"])
+def test_map_categorical(na_action):
     values = pd.Categorical(list("ABBABCD"), categories=list("DCBA"), ordered=True)
     s = Series(values, name="XX", index=list("abcdefg"))
 
-    result = s.map(lambda x: x.lower())
+    result = s.map(lambda x: x.lower(), na_action=na_action)
     exp_values = pd.Categorical(list("abbabcd"), categories=list("dcba"), ordered=True)
     exp = Series(exp_values, name="XX", index=list("abcdefg"))
     tm.assert_series_equal(result, exp)
     tm.assert_categorical_equal(result.values, exp_values)
 
-    result = s.map(lambda x: "A")
+    result = s.map(lambda x: "A", na_action=na_action)
     exp = Series(["A"] * 7, name="XX", index=list("abcdefg"))
     tm.assert_series_equal(result, exp)
     assert result.dtype == object
 
 
+@pytest.mark.parametrize(
+    "na_action, expected",
+    (
+        [None, Series(["A", "B", "nan"], name="XX")],
+        [
+            "ignore",
+            Series(
+                ["A", "B", np.nan],
+                name="XX",
+                dtype=pd.CategoricalDtype(list("DCBA"), True),
+            ),
+        ],
+    ),
+)
+def test_map_categorical_na_action(na_action, expected):
+    dtype = pd.CategoricalDtype(list("DCBA"), ordered=True)
+    values = pd.Categorical(list("AB") + [np.nan], dtype=dtype)
+    s = Series(values, name="XX")
+    result = s.map(str, na_action=na_action)
+    tm.assert_series_equal(result, expected)
+
+
 def test_map_datetimetz():
     values = pd.date_range("2011-01-01", "2011-01-02", freq="H").tz_localize(
         "Asia/Tokyo"

diff --git a/pandas/tests/arrays/categorical/test_analytics.py b/pandas/tests/arrays/categorical/test_analytics.py
@@ -300,16 +300,16 @@ def test_memory_usage(self):
 
     def test_map(self):
         c = Categorical(list("ABABC"), categories=list("CBA"), ordered=True)
-        result = c.map(lambda x: x.lower())
+        result = c.map(lambda x: x.lower(), na_action=None)
         exp = Categorical(list("ababc"), categories=list("cba"), ordered=True)
         tm.assert_categorical_equal(result, exp)
 
         c = Categorical(list("ABABC"), categories=list("ABC"), ordered=False)
-        result = c.map(lambda x: x.lower())
+        result = c.map(lambda x: x.lower(), na_action=None)
         exp = Categorical(list("ababc"), categories=list("abc"), ordered=False)
         tm.assert_categorical_equal(result, exp)
 
-        result = c.map(lambda x: 1)
+        result = c.map(lambda x: 1, na_action=None)
         # GH 12766: Return an index not an array
         tm.assert_index_equal(result, Index(np.array([1] * 5, dtype=np.int64)))