diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index e7d414f9de544..afbf4baf0d002 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1080,20 +1080,73 @@ def remove_unused_categories(self, inplace=False): return cat def map(self, mapper): - """Apply mapper function to its categories (not codes). + """ + Map categories using input correspondence (dict, Series, or function). + + Maps the categories to new categories. If the mapping correspondence is + one-to-one the result is a :class:`~pandas.Categorical` which has the + same order property as the original, otherwise a :class:`~pandas.Index` + is returned. + + If a `dict` or :class:`~pandas.Series` is used any unmapped category is + mapped to `NaN`. Note that if this happens an :class:`~pandas.Index` + will be returned. Parameters ---------- - mapper : callable - Function to be applied. When all categories are mapped - to different categories, the result will be Categorical which has - the same order property as the original. Otherwise, the result will - be np.ndarray. + mapper : function, dict, or Series + Mapping correspondence. Returns ------- - applied : Categorical or Index. + pandas.Categorical or pandas.Index + Mapped categorical. + + See Also + -------- + CategoricalIndex.map : Apply a mapping correspondence on a + :class:`~pandas.CategoricalIndex`. + Index.map : Apply a mapping correspondence on an + :class:`~pandas.Index`. + Series.map : Apply a mapping correspondence on a + :class:`~pandas.Series`. + Series.apply : Apply more complex functions on a + :class:`~pandas.Series`. + + Examples + -------- + >>> cat = pd.Categorical(['a', 'b', 'c']) + >>> cat + [a, b, c] + Categories (3, object): [a, b, c] + >>> cat.map(lambda x: x.upper()) + [A, B, C] + Categories (3, object): [A, B, C] + >>> cat.map({'a': 'first', 'b': 'second', 'c': 'third'}) + [first, second, third] + Categories (3, object): [first, second, third] + + If the mapping is one-to-one the ordering of the categories is + preserved: + + >>> cat = pd.Categorical(['a', 'b', 'c'], ordered=True) + >>> cat + [a, b, c] + Categories (3, object): [a < b < c] + >>> cat.map({'a': 3, 'b': 2, 'c': 1}) + [3, 2, 1] + Categories (3, int64): [3 < 2 < 1] + + If the mapping is not one-to-one an :class:`~pandas.Index` is returned: + + >>> cat.map({'a': 'first', 'b': 'second', 'c': 'first'}) + Index(['first', 'second', 'first'], dtype='object') + + If a `dict` is used, all unmapped categories are mapped to `NaN` and + the result is an :class:`~pandas.Index`: + >>> cat.map({'a': 'first', 'b': 'second'}) + Index(['first', 'second', nan], dtype='object') """ new_categories = self.categories.map(mapper) try: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 446b1b02706e3..95bfc8bfcb5c5 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3352,14 +3352,16 @@ def groupby(self, values): return result def map(self, mapper, na_action=None): - """Map values of Series using input correspondence + """ + Map values using input correspondence (a dict, Series, or function). Parameters ---------- mapper : function, dict, or Series + Mapping correspondence. na_action : {None, 'ignore'} If 'ignore', propagate NA values, without passing them to the - mapping function + mapping correspondence. Returns ------- @@ -3367,7 +3369,6 @@ def map(self, mapper, na_action=None): The output of the mapping function applied to the index. If the function returns a tuple with more than one element a MultiIndex will be returned. - """ from .multi import MultiIndex diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 7b902b92d44a4..71caa098c7a28 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -660,20 +660,71 @@ def is_dtype_equal(self, other): take_nd = take def map(self, mapper): - """Apply mapper function to its categories (not codes). + """ + Map values using input correspondence (a dict, Series, or function). + + Maps the values (their categories, not the codes) of the index to new + categories. If the mapping correspondence is one-to-one the result is a + :class:`~pandas.CategoricalIndex` which has the same order property as + the original, otherwise an :class:`~pandas.Index` is returned. + + If a `dict` or :class:`~pandas.Series` is used any unmapped category is + mapped to `NaN`. Note that if this happens an :class:`~pandas.Index` + will be returned. Parameters ---------- - mapper : callable - Function to be applied. When all categories are mapped - to different categories, the result will be a CategoricalIndex - which has the same order property as the original. Otherwise, - the result will be a Index. + mapper : function, dict, or Series + Mapping correspondence. Returns ------- - applied : CategoricalIndex or Index + pandas.CategoricalIndex or pandas.Index + Mapped index. + + See Also + -------- + Index.map : Apply a mapping correspondence on an + :class:`~pandas.Index`. + Series.map : Apply a mapping correspondence on a + :class:`~pandas.Series`. + Series.apply : Apply more complex functions on a + :class:`~pandas.Series`. + Examples + -------- + >>> idx = pd.CategoricalIndex(['a', 'b', 'c']) + >>> idx + CategoricalIndex(['a', 'b', 'c'], categories=['a', 'b', 'c'], + ordered=False, dtype='category') + >>> idx.map(lambda x: x.upper()) + CategoricalIndex(['A', 'B', 'C'], categories=['A', 'B', 'C'], + ordered=False, dtype='category') + >>> idx.map({'a': 'first', 'b': 'second', 'c': 'third'}) + CategoricalIndex(['first', 'second', 'third'], categories=['first', + 'second', 'third'], ordered=False, dtype='category') + + If the mapping is one-to-one the ordering of the categories is + preserved: + + >>> idx = pd.CategoricalIndex(['a', 'b', 'c'], ordered=True) + >>> idx + CategoricalIndex(['a', 'b', 'c'], categories=['a', 'b', 'c'], + ordered=True, dtype='category') + >>> idx.map({'a': 3, 'b': 2, 'c': 1}) + CategoricalIndex([3, 2, 1], categories=[3, 2, 1], ordered=True, + dtype='category') + + If the mapping is not one-to-one an :class:`~pandas.Index` is returned: + + >>> idx.map({'a': 'first', 'b': 'second', 'c': 'first'}) + Index(['first', 'second', 'first'], dtype='object') + + If a `dict` is used, all unmapped categories are mapped to `NaN` and + the result is an :class:`~pandas.Index`: + + >>> idx.map({'a': 'first', 'b': 'second'}) + Index(['first', 'second', nan], dtype='object') """ return self._shallow_copy_with_infer(self.values.map(mapper)) diff --git a/pandas/core/series.py b/pandas/core/series.py index e4801242073a2..f0ba369e1731a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2831,25 +2831,26 @@ def unstack(self, level=-1, fill_value=None): def map(self, arg, na_action=None): """ - Map values of Series using input correspondence (which can be - a dict, Series, or function) + Map values of Series using input correspondence (a dict, Series, or + function). Parameters ---------- arg : function, dict, or Series + Mapping correspondence. na_action : {None, 'ignore'} If 'ignore', propagate NA values, without passing them to the - mapping function + mapping correspondence. Returns ------- y : Series - same index as caller + Same index as caller. Examples -------- - Map inputs to outputs (both of type `Series`) + Map inputs to outputs (both of type `Series`): >>> x = pd.Series([1,2,3], index=['one', 'two', 'three']) >>> x @@ -2900,9 +2901,9 @@ def map(self, arg, na_action=None): See Also -------- - Series.apply: For applying more complex functions on a Series - DataFrame.apply: Apply a function row-/column-wise - DataFrame.applymap: Apply a function elementwise on a whole DataFrame + Series.apply : For applying more complex functions on a Series. + DataFrame.apply : Apply a function row-/column-wise. + DataFrame.applymap : Apply a function elementwise on a whole DataFrame. Notes -----