diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 38faf90f1de74..5646aa5947bee 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -28,6 +28,9 @@ enhancement2 Other enhancements ^^^^^^^^^^^^^^^^^^ +- :meth:`Categorical.map` and :meth:`CategoricalIndex.map` now have a ``na_action`` parameter. + :meth:`Categorical.map` implicitly had a default value of ``"ignore"`` for ``na_action``. This has formally been deprecated and will be changed to ``None`` in the future. + Also notice that :meth:`Series.map` has default ``na_action=None`` and calls to series with categorical data will now use ``na_action=None`` unless explicitly set otherwise (:issue:`44279`) - Implemented ``__pandas_priority__`` to allow custom types to take precedence over :class:`DataFrame`, :class:`Series`, :class:`Index`, or :class:`ExtensionArray` for arithmetic operations, :ref:`see the developer guide ` (:issue:`48347`) - :meth:`MultiIndex.sort_values` now supports ``na_position`` (:issue:`51612`) - :meth:`MultiIndex.sortlevel` and :meth:`Index.sortlevel` gained a new keyword ``na_position`` (:issue:`51612`) @@ -38,7 +41,9 @@ Other enhancements - Improved error message when creating a DataFrame with empty data (0 rows), no index and an incorrect number of columns. (:issue:`52084`) - :meth:`DataFrame.applymap` now uses the :meth:`~api.extensions.ExtensionArray.map` method of underlying :class:`api.extensions.ExtensionArray` instances (:issue:`52219`) - :meth:`arrays.SparseArray.map` now supports ``na_action`` (:issue:`52096`). +- :meth:`Categorical.map` and :meth:`CategoricalIndex.map` now have a ``na_action`` parameter (:issue:`44279`) - Add dtype of categories to ``repr`` information of :class:`CategoricalDtype` (:issue:`52179`) +- .. --------------------------------------------------------------------------- .. _whatsnew_210.notable_bug_fixes: @@ -149,7 +154,7 @@ Bug fixes Categorical ^^^^^^^^^^^ -- +- Bug in :meth:`Series.map` , where the value of the ``na_action`` parameter was not used if the series held a :class:`Categorical` (:issue:`22527`). - Datetimelike diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 9b1ef4ad8a41b..2ffd49f674cfb 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -39,6 +39,7 @@ from pandas.core.dtypes.cast import is_nested_object from pandas.core.dtypes.common import ( + is_categorical_dtype, is_dict_like, is_list_like, is_sequence, @@ -1082,7 +1083,12 @@ def apply_standard(self) -> DataFrame | Series: return f(obj) # row-wise access - mapped = obj._map_values(mapper=f, convert=self.convert_dtype) + # apply doesn't have a `na_action` keyword and for backward compat reasons + # we need to give `na_action="ignore"` for categorical data. + # TODO: remove the `na_action="ignore"` when that default has been changed in + # Categorical (GH51645). + action = "ignore" if is_categorical_dtype(obj) else None + mapped = obj._map_values(mapper=f, na_action=action, convert=self.convert_dtype) if len(mapped) and isinstance(mapped[0], ABCSeries): # GH#43986 Need to do list(mapped) in order to get treated as nested diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 673058c30664b..0219d97f12e8f 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1203,7 +1203,11 @@ def remove_unused_categories(self) -> Categorical: # ------------------------------------------------------------------ - def map(self, mapper, na_action=None): + def map( + self, + mapper, + na_action: Literal["ignore"] | None | lib.NoDefault = lib.no_default, + ): """ Map categories using an input mapping or function. @@ -1220,6 +1224,14 @@ def map(self, mapper, na_action=None): ---------- mapper : function, dict, or Series Mapping correspondence. + na_action : {None, 'ignore'}, default 'ignore' + If 'ignore', propagate NaN values, without passing them to the + mapping correspondence. + + .. deprecated:: 2.1.0 + + The default value of 'ignore' has been deprecated and will be changed to + None in the future. Returns ------- @@ -1243,10 +1255,10 @@ def map(self, mapper, na_action=None): >>> cat ['a', 'b', 'c'] Categories (3, object): ['a', 'b', 'c'] - >>> cat.map(lambda x: x.upper()) + >>> cat.map(lambda x: x.upper(), na_action=None) ['A', 'B', 'C'] Categories (3, object): ['A', 'B', 'C'] - >>> cat.map({'a': 'first', 'b': 'second', 'c': 'third'}) + >>> cat.map({'a': 'first', 'b': 'second', 'c': 'third'}, na_action=None) ['first', 'second', 'third'] Categories (3, object): ['first', 'second', 'third'] @@ -1257,35 +1269,50 @@ def map(self, mapper, na_action=None): >>> cat ['a', 'b', 'c'] Categories (3, object): ['a' < 'b' < 'c'] - >>> cat.map({'a': 3, 'b': 2, 'c': 1}) + >>> cat.map({'a': 3, 'b': 2, 'c': 1}, na_action=None) [3, 2, 1] Categories (3, int64): [3 < 2 < 1] If the mapping is not one-to-one an :class:`~pandas.Index` is returned: - >>> cat.map({'a': 'first', 'b': 'second', 'c': 'first'}) + >>> cat.map({'a': 'first', 'b': 'second', 'c': 'first'}, na_action=None) Index(['first', 'second', 'first'], dtype='object') If a `dict` is used, all unmapped categories are mapped to `NaN` and the result is an :class:`~pandas.Index`: - >>> cat.map({'a': 'first', 'b': 'second'}) + >>> cat.map({'a': 'first', 'b': 'second'}, na_action=None) Index(['first', 'second', nan], dtype='object') """ - if na_action is not None: - raise NotImplementedError + if na_action is lib.no_default: + warnings.warn( + "The default value of 'ignore' for the `na_action` parameter in " + "pandas.Categorical.map is deprecated and will be " + "changed to 'None' in a future version. Please set na_action to the " + "desired value to avoid seeing this warning", + FutureWarning, + stacklevel=find_stack_level(), + ) + na_action = "ignore" + + assert callable(mapper) or is_dict_like(mapper) new_categories = self.categories.map(mapper) - try: - return self.from_codes( - self._codes.copy(), categories=new_categories, ordered=self.ordered - ) - except ValueError: - # NA values are represented in self._codes with -1 - # np.take causes NA values to take final element in new_categories - if np.any(self._codes == -1): - new_categories = new_categories.insert(len(new_categories), np.nan) - return np.take(new_categories, self._codes) + + has_nans = np.any(self._codes == -1) + + na_val = np.nan + if na_action is None and has_nans: + na_val = mapper(np.nan) if callable(mapper) else mapper.get(np.nan, np.nan) + + if new_categories.is_unique and not new_categories.hasnans and na_val is np.nan: + new_dtype = CategoricalDtype(new_categories, ordered=self.ordered) + return self.from_codes(self._codes.copy(), dtype=new_dtype) + + if has_nans: + new_categories = new_categories.insert(len(new_categories), na_val) + + return np.take(new_categories, self._codes) __eq__ = _cat_compare_op(operator.eq) __ne__ = _cat_compare_op(operator.ne) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 89b1777360bac..fd8d602118f6a 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -4,6 +4,7 @@ TYPE_CHECKING, Any, Hashable, + Literal, cast, ) @@ -402,7 +403,7 @@ def _maybe_cast_listlike_indexer(self, values) -> CategoricalIndex: def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: return self.categories._is_comparable_dtype(dtype) - def map(self, mapper): + def map(self, mapper, na_action: Literal["ignore"] | None = None): """ Map values using input an input mapping or function. @@ -469,7 +470,7 @@ def map(self, mapper): >>> idx.map({'a': 'first', 'b': 'second'}) Index(['first', 'second', nan], dtype='object') """ - mapped = self._values.map(mapper) + mapped = self._values.map(mapper, na_action=na_action) return Index(mapped, name=self.name) def _concat(self, to_concat: list[Index], name: Hashable) -> Index: diff --git a/pandas/tests/apply/test_invalid_arg.py b/pandas/tests/apply/test_invalid_arg.py index 0207391c3070a..81f85a7b191d4 100644 --- a/pandas/tests/apply/test_invalid_arg.py +++ b/pandas/tests/apply/test_invalid_arg.py @@ -15,7 +15,6 @@ from pandas.errors import SpecificationError from pandas import ( - Categorical, DataFrame, Series, date_range, @@ -76,13 +75,6 @@ def test_map_arg_is_dict_with_invalid_na_action_raises(input_na_action): s.map({1: 2}, na_action=input_na_action) -def test_map_categorical_na_action(): - values = Categorical(list("ABBABCD"), categories=list("DCBA"), ordered=True) - s = Series(values, name="XX", index=list("abcdefg")) - with pytest.raises(NotImplementedError, match=tm.EMPTY_STRING_PATTERN): - s.map(lambda x: x, na_action="ignore") - - @pytest.mark.parametrize("method", ["apply", "agg", "transform"]) @pytest.mark.parametrize("func", [{"A": {"B": "sum"}}, {"A": {"B": ["sum"]}}]) def test_nested_renamer(frame_or_series, method, func): diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index bd0167701d08b..813ad6197f4e9 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -649,12 +649,15 @@ def test_map_defaultdict_ignore_na(): tm.assert_series_equal(result, expected) -def test_map_categorical_na_ignore(): +@pytest.mark.parametrize( + "na_action, expected", + [(None, Series([10.0, 42.0, np.nan])), ("ignore", Series([10, np.nan, np.nan]))], +) +def test_map_categorical_na_ignore(na_action, expected): # GH#47527 - values = pd.Categorical([1, np.nan, 2], categories=[10, 1]) + values = pd.Categorical([1, np.nan, 2], categories=[10, 1, 2]) ser = Series(values) - result = ser.map({1: 10, np.nan: 42}) - expected = Series([10, np.nan, np.nan]) + result = ser.map({1: 10, np.nan: 42}, na_action=na_action) tm.assert_series_equal(result, expected) @@ -748,22 +751,45 @@ def test_map_box(): tm.assert_series_equal(res, exp) -def test_map_categorical(): +@pytest.mark.parametrize("na_action", [None, "ignore"]) +def test_map_categorical(na_action): values = pd.Categorical(list("ABBABCD"), categories=list("DCBA"), ordered=True) s = Series(values, name="XX", index=list("abcdefg")) - result = s.map(lambda x: x.lower()) + result = s.map(lambda x: x.lower(), na_action=na_action) exp_values = pd.Categorical(list("abbabcd"), categories=list("dcba"), ordered=True) exp = Series(exp_values, name="XX", index=list("abcdefg")) tm.assert_series_equal(result, exp) tm.assert_categorical_equal(result.values, exp_values) - result = s.map(lambda x: "A") + result = s.map(lambda x: "A", na_action=na_action) exp = Series(["A"] * 7, name="XX", index=list("abcdefg")) tm.assert_series_equal(result, exp) assert result.dtype == object +@pytest.mark.parametrize( + "na_action, expected", + ( + [None, Series(["A", "B", "nan"], name="XX")], + [ + "ignore", + Series( + ["A", "B", np.nan], + name="XX", + dtype=pd.CategoricalDtype(list("DCBA"), True), + ), + ], + ), +) +def test_map_categorical_na_action(na_action, expected): + dtype = pd.CategoricalDtype(list("DCBA"), ordered=True) + values = pd.Categorical(list("AB") + [np.nan], dtype=dtype) + s = Series(values, name="XX") + result = s.map(str, na_action=na_action) + tm.assert_series_equal(result, expected) + + def test_map_datetimetz(): values = pd.date_range("2011-01-01", "2011-01-02", freq="H").tz_localize( "Asia/Tokyo" diff --git a/pandas/tests/arrays/categorical/test_analytics.py b/pandas/tests/arrays/categorical/test_analytics.py index 55d39cf84eb30..057005b30ae20 100644 --- a/pandas/tests/arrays/categorical/test_analytics.py +++ b/pandas/tests/arrays/categorical/test_analytics.py @@ -300,16 +300,16 @@ def test_memory_usage(self): def test_map(self): c = Categorical(list("ABABC"), categories=list("CBA"), ordered=True) - result = c.map(lambda x: x.lower()) + result = c.map(lambda x: x.lower(), na_action=None) exp = Categorical(list("ababc"), categories=list("cba"), ordered=True) tm.assert_categorical_equal(result, exp) c = Categorical(list("ABABC"), categories=list("ABC"), ordered=False) - result = c.map(lambda x: x.lower()) + result = c.map(lambda x: x.lower(), na_action=None) exp = Categorical(list("ababc"), categories=list("abc"), ordered=False) tm.assert_categorical_equal(result, exp) - result = c.map(lambda x: 1) + result = c.map(lambda x: 1, na_action=None) # GH 12766: Return an index not an array tm.assert_index_equal(result, Index(np.array([1] * 5, dtype=np.int64))) diff --git a/pandas/tests/arrays/categorical/test_map.py b/pandas/tests/arrays/categorical/test_map.py new file mode 100644 index 0000000000000..3d41b7cc7094d --- /dev/null +++ b/pandas/tests/arrays/categorical/test_map.py @@ -0,0 +1,154 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Categorical, + Index, + Series, +) +import pandas._testing as tm + + +@pytest.fixture(params=[None, "ignore"]) +def na_action(request): + return request.param + + +@pytest.mark.parametrize( + "data, categories", + [ + (list("abcbca"), list("cab")), + (pd.interval_range(0, 3).repeat(3), pd.interval_range(0, 3)), + ], + ids=["string", "interval"], +) +def test_map_str(data, categories, ordered, na_action): + # GH 31202 - override base class since we want to maintain categorical/ordered + cat = Categorical(data, categories=categories, ordered=ordered) + result = cat.map(str, na_action=na_action) + expected = Categorical( + map(str, data), categories=map(str, categories), ordered=ordered + ) + tm.assert_categorical_equal(result, expected) + + +def test_map(na_action): + cat = Categorical(list("ABABC"), categories=list("CBA"), ordered=True) + result = cat.map(lambda x: x.lower(), na_action=na_action) + exp = Categorical(list("ababc"), categories=list("cba"), ordered=True) + tm.assert_categorical_equal(result, exp) + + cat = Categorical(list("ABABC"), categories=list("BAC"), ordered=False) + result = cat.map(lambda x: x.lower(), na_action=na_action) + exp = Categorical(list("ababc"), categories=list("bac"), ordered=False) + tm.assert_categorical_equal(result, exp) + + # GH 12766: Return an index not an array + result = cat.map(lambda x: 1, na_action=na_action) + exp = Index(np.array([1] * 5, dtype=np.int64)) + tm.assert_index_equal(result, exp) + + # change categories dtype + cat = Categorical(list("ABABC"), categories=list("BAC"), ordered=False) + + def f(x): + return {"A": 10, "B": 20, "C": 30}.get(x) + + result = cat.map(f, na_action=na_action) + exp = Categorical([10, 20, 10, 20, 30], categories=[20, 10, 30], ordered=False) + tm.assert_categorical_equal(result, exp) + + mapper = Series([10, 20, 30], index=["A", "B", "C"]) + result = cat.map(mapper, na_action=na_action) + tm.assert_categorical_equal(result, exp) + + result = cat.map({"A": 10, "B": 20, "C": 30}, na_action=na_action) + tm.assert_categorical_equal(result, exp) + + +@pytest.mark.parametrize( + ("data", "f", "expected"), + ( + ([1, 1, np.nan], pd.isna, Index([False, False, True])), + ([1, 2, np.nan], pd.isna, Index([False, False, True])), + ([1, 1, np.nan], {1: False}, Categorical([False, False, np.nan])), + ([1, 2, np.nan], {1: False, 2: False}, Index([False, False, np.nan])), + ( + [1, 1, np.nan], + Series([False, False]), + Categorical([False, False, np.nan]), + ), + ( + [1, 2, np.nan], + Series([False] * 3), + Index([False, False, np.nan]), + ), + ), +) +def test_map_with_nan_none(data, f, expected): # GH 24241 + values = Categorical(data) + result = values.map(f, na_action=None) + if isinstance(expected, Categorical): + tm.assert_categorical_equal(result, expected) + else: + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize( + ("data", "f", "expected"), + ( + ([1, 1, np.nan], pd.isna, Categorical([False, False, np.nan])), + ([1, 2, np.nan], pd.isna, Index([False, False, np.nan])), + ([1, 1, np.nan], {1: False}, Categorical([False, False, np.nan])), + ([1, 2, np.nan], {1: False, 2: False}, Index([False, False, np.nan])), + ( + [1, 1, np.nan], + Series([False, False]), + Categorical([False, False, np.nan]), + ), + ( + [1, 2, np.nan], + Series([False, False, False]), + Index([False, False, np.nan]), + ), + ), +) +def test_map_with_nan_ignore(data, f, expected): # GH 24241 + values = Categorical(data) + result = values.map(f, na_action="ignore") + if data[1] == 1: + tm.assert_categorical_equal(result, expected) + else: + tm.assert_index_equal(result, expected) + + +def test_map_with_dict_or_series(na_action): + orig_values = ["a", "B", 1, "a"] + new_values = ["one", 2, 3.0, "one"] + cat = Categorical(orig_values) + + mapper = Series(new_values[:-1], index=orig_values[:-1]) + result = cat.map(mapper, na_action=na_action) + + # Order of categories in result can be different + expected = Categorical(new_values, categories=[3.0, 2, "one"]) + tm.assert_categorical_equal(result, expected) + + mapper = dict(zip(orig_values[:-1], new_values[:-1])) + result = cat.map(mapper, na_action=na_action) + # Order of categories in result can be different + tm.assert_categorical_equal(result, expected) + + +def test_map_na_action_no_default_deprecated(): + # GH51645 + cat = Categorical(["a", "b", "c"]) + msg = ( + "The default value of 'ignore' for the `na_action` parameter in " + "pandas.Categorical.map is deprecated and will be " + "changed to 'None' in a future version. Please set na_action to the " + "desired value to avoid seeing this warning" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + cat.map(lambda x: x) diff --git a/pandas/tests/arrays/categorical/test_subclass.py b/pandas/tests/arrays/categorical/test_subclass.py index b80d0ff41aba6..48325395faad8 100644 --- a/pandas/tests/arrays/categorical/test_subclass.py +++ b/pandas/tests/arrays/categorical/test_subclass.py @@ -16,7 +16,7 @@ def test_from_codes(self): def test_map(self): sc = tm.SubclassedCategorical(["a", "b", "c"]) - res = sc.map(lambda x: x.upper()) + res = sc.map(lambda x: x.upper(), na_action=None) assert isinstance(res, tm.SubclassedCategorical) exp = Categorical(["A", "B", "C"]) tm.assert_categorical_equal(res, exp) diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index 34a23315fd9fa..f331449489bcc 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -186,12 +186,8 @@ def test_combine_add(self, data_repeated): @pytest.mark.parametrize("na_action", [None, "ignore"]) def test_map(self, data, na_action): - if na_action is not None: - with pytest.raises(NotImplementedError, match=""): - data.map(lambda x: x, na_action=na_action) - else: - result = data.map(lambda x: x, na_action=na_action) - self.assert_extension_array_equal(result, data) + result = data.map(lambda x: x, na_action=na_action) + self.assert_extension_array_equal(result, data) class TestCasting(base.BaseCastingTests): diff --git a/pandas/tests/indexes/categorical/test_map.py b/pandas/tests/indexes/categorical/test_map.py index 261ee8daf5dec..baf836594dfb5 100644 --- a/pandas/tests/indexes/categorical/test_map.py +++ b/pandas/tests/indexes/categorical/test_map.py @@ -10,106 +10,135 @@ import pandas._testing as tm -class TestMap: - @pytest.mark.parametrize( - "data, categories", - [ - (list("abcbca"), list("cab")), - (pd.interval_range(0, 3).repeat(3), pd.interval_range(0, 3)), - ], - ids=["string", "interval"], +@pytest.mark.parametrize( + "data, categories", + [ + (list("abcbca"), list("cab")), + (pd.interval_range(0, 3).repeat(3), pd.interval_range(0, 3)), + ], + ids=["string", "interval"], +) +def test_map_str(data, categories, ordered): + # GH 31202 - override base class since we want to maintain categorical/ordered + index = CategoricalIndex(data, categories=categories, ordered=ordered) + result = index.map(str) + expected = CategoricalIndex( + map(str, data), categories=map(str, categories), ordered=ordered + ) + tm.assert_index_equal(result, expected) + + +def test_map(): + ci = CategoricalIndex(list("ABABC"), categories=list("CBA"), ordered=True) + result = ci.map(lambda x: x.lower()) + exp = CategoricalIndex(list("ababc"), categories=list("cba"), ordered=True) + tm.assert_index_equal(result, exp) + + ci = CategoricalIndex( + list("ABABC"), categories=list("BAC"), ordered=False, name="XXX" + ) + result = ci.map(lambda x: x.lower()) + exp = CategoricalIndex( + list("ababc"), categories=list("bac"), ordered=False, name="XXX" + ) + tm.assert_index_equal(result, exp) + + # GH 12766: Return an index not an array + tm.assert_index_equal( + ci.map(lambda x: 1), Index(np.array([1] * 5, dtype=np.int64), name="XXX") ) - def test_map_str(self, data, categories, ordered): - # GH 31202 - override base class since we want to maintain categorical/ordered - index = CategoricalIndex(data, categories=categories, ordered=ordered) - result = index.map(str) - expected = CategoricalIndex( - map(str, data), categories=map(str, categories), ordered=ordered - ) - tm.assert_index_equal(result, expected) - - def test_map(self): - ci = CategoricalIndex(list("ABABC"), categories=list("CBA"), ordered=True) - result = ci.map(lambda x: x.lower()) - exp = CategoricalIndex(list("ababc"), categories=list("cba"), ordered=True) - tm.assert_index_equal(result, exp) - - ci = CategoricalIndex( - list("ABABC"), categories=list("BAC"), ordered=False, name="XXX" - ) - result = ci.map(lambda x: x.lower()) - exp = CategoricalIndex( - list("ababc"), categories=list("bac"), ordered=False, name="XXX" - ) - tm.assert_index_equal(result, exp) - - # GH 12766: Return an index not an array - tm.assert_index_equal( - ci.map(lambda x: 1), Index(np.array([1] * 5, dtype=np.int64), name="XXX") - ) - - # change categories dtype - ci = CategoricalIndex(list("ABABC"), categories=list("BAC"), ordered=False) - - def f(x): - return {"A": 10, "B": 20, "C": 30}.get(x) - - result = ci.map(f) - exp = CategoricalIndex( - [10, 20, 10, 20, 30], categories=[20, 10, 30], ordered=False - ) - tm.assert_index_equal(result, exp) - - result = ci.map(Series([10, 20, 30], index=["A", "B", "C"])) - tm.assert_index_equal(result, exp) - - result = ci.map({"A": 10, "B": 20, "C": 30}) - tm.assert_index_equal(result, exp) - - def test_map_with_categorical_series(self): - # GH 12756 - a = Index([1, 2, 3, 4]) - b = Series(["even", "odd", "even", "odd"], dtype="category") - c = Series(["even", "odd", "even", "odd"]) - - exp = CategoricalIndex(["odd", "even", "odd", np.nan]) - tm.assert_index_equal(a.map(b), exp) - exp = Index(["odd", "even", "odd", np.nan]) - tm.assert_index_equal(a.map(c), exp) - - @pytest.mark.parametrize( - ("data", "f"), + + # change categories dtype + ci = CategoricalIndex(list("ABABC"), categories=list("BAC"), ordered=False) + + def f(x): + return {"A": 10, "B": 20, "C": 30}.get(x) + + result = ci.map(f) + exp = CategoricalIndex([10, 20, 10, 20, 30], categories=[20, 10, 30], ordered=False) + tm.assert_index_equal(result, exp) + + result = ci.map(Series([10, 20, 30], index=["A", "B", "C"])) + tm.assert_index_equal(result, exp) + + result = ci.map({"A": 10, "B": 20, "C": 30}) + tm.assert_index_equal(result, exp) + + +def test_map_with_categorical_series(): + # GH 12756 + a = Index([1, 2, 3, 4]) + b = Series(["even", "odd", "even", "odd"], dtype="category") + c = Series(["even", "odd", "even", "odd"]) + + exp = CategoricalIndex(["odd", "even", "odd", np.nan]) + tm.assert_index_equal(a.map(b), exp) + exp = Index(["odd", "even", "odd", np.nan]) + tm.assert_index_equal(a.map(c), exp) + + +@pytest.mark.parametrize( + ("data", "f", "expected"), + ( + ([1, 1, np.nan], pd.isna, CategoricalIndex([False, False, np.nan])), + ([1, 2, np.nan], pd.isna, Index([False, False, np.nan])), + ([1, 1, np.nan], {1: False}, CategoricalIndex([False, False, np.nan])), + ([1, 2, np.nan], {1: False, 2: False}, Index([False, False, np.nan])), ( - ([1, 1, np.nan], pd.isna), - ([1, 2, np.nan], pd.isna), - ([1, 1, np.nan], {1: False}), - ([1, 2, np.nan], {1: False, 2: False}), - ([1, 1, np.nan], Series([False, False])), - ([1, 2, np.nan], Series([False, False, False])), + [1, 1, np.nan], + Series([False, False]), + CategoricalIndex([False, False, np.nan]), ), - ) - def test_map_with_nan(self, data, f): # GH 24241 - values = pd.Categorical(data) - result = values.map(f) - if data[1] == 1: - expected = pd.Categorical([False, False, np.nan]) - tm.assert_categorical_equal(result, expected) - else: - expected = Index([False, False, np.nan]) - tm.assert_index_equal(result, expected) - - def test_map_with_dict_or_series(self): - orig_values = ["a", "B", 1, "a"] - new_values = ["one", 2, 3.0, "one"] - cur_index = CategoricalIndex(orig_values, name="XXX") - expected = CategoricalIndex(new_values, name="XXX", categories=[3.0, 2, "one"]) - - mapper = Series(new_values[:-1], index=orig_values[:-1]) - result = cur_index.map(mapper) - # Order of categories in result can be different - tm.assert_index_equal(result, expected) - - mapper = dict(zip(orig_values[:-1], new_values[:-1])) - result = cur_index.map(mapper) - # Order of categories in result can be different - tm.assert_index_equal(result, expected) + ( + [1, 2, np.nan], + Series([False, False, False]), + Index([False, False, np.nan]), + ), + ), +) +def test_map_with_nan_ignore(data, f, expected): # GH 24241 + values = CategoricalIndex(data) + result = values.map(f, na_action="ignore") + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize( + ("data", "f", "expected"), + ( + ([1, 1, np.nan], pd.isna, Index([False, False, True])), + ([1, 2, np.nan], pd.isna, Index([False, False, True])), + ([1, 1, np.nan], {1: False}, CategoricalIndex([False, False, np.nan])), + ([1, 2, np.nan], {1: False, 2: False}, Index([False, False, np.nan])), + ( + [1, 1, np.nan], + Series([False, False]), + CategoricalIndex([False, False, np.nan]), + ), + ( + [1, 2, np.nan], + Series([False, False, False]), + Index([False, False, np.nan]), + ), + ), +) +def test_map_with_nan_none(data, f, expected): # GH 24241 + values = CategoricalIndex(data) + result = values.map(f, na_action=None) + tm.assert_index_equal(result, expected) + + +def test_map_with_dict_or_series(): + orig_values = ["a", "B", 1, "a"] + new_values = ["one", 2, 3.0, "one"] + cur_index = CategoricalIndex(orig_values, name="XXX") + expected = CategoricalIndex(new_values, name="XXX", categories=[3.0, 2, "one"]) + + mapper = Series(new_values[:-1], index=orig_values[:-1]) + result = cur_index.map(mapper) + # Order of categories in result can be different + tm.assert_index_equal(result, expected) + + mapper = dict(zip(orig_values[:-1], new_values[:-1])) + result = cur_index.map(mapper) + # Order of categories in result can be different + tm.assert_index_equal(result, expected)