From b551b28a9334b4bdaf80056669b88be675982542 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 25 Oct 2017 11:37:33 -0500 Subject: [PATCH 1/9] PERF/API: Treat series as array-like for rename_categories MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit HEAD: ``` [ 50.00%] ··· Running categoricals.Categoricals3.time_rank_string_cat 6.63ms [ 50.00%] ····· [100.00%] ··· Running categoricals.Categoricals3.time_rank_string_cat_ordered 4.85ms ``` Closes https://github.com/pandas-dev/pandas/issues/17981 --- pandas/core/categorical.py | 3 ++- pandas/tests/test_categorical.py | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index e8537fb576536..8010385cdeff7 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -900,7 +900,8 @@ def rename_categories(self, new_categories, inplace=False): inplace = validate_bool_kwarg(inplace, 'inplace') cat = self if inplace else self.copy() - if is_dict_like(new_categories): + if (is_dict_like(new_categories) and + not isinstance(new_categories, ABCSeries)): cat.categories = [new_categories.get(item, item) for item in cat.categories] else: diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index d88e92a39a6c5..df33259430889 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -1203,6 +1203,13 @@ def test_rename_categories(self): with pytest.raises(ValueError): cat.rename_categories([1, 2]) + def test_rename_categories_series(self): + # https://github.com/pandas-dev/pandas/issues/17981 + result = pd.Categorical(['a', 'b']).rename_categories( + pd.Series([0, 1])) + expected = pd.Categorical([0, 1]) + tm.assert_categorical_equal(result, expected) + def test_rename_categories_dict(self): # GH 17336 cat = pd.Categorical(['a', 'b', 'c', 'd']) From 19af62a4f8035832dde39a6530338a9c1a92ae43 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 25 Oct 2017 11:48:26 -0500 Subject: [PATCH 2/9] Redo docstring --- pandas/core/categorical.py | 44 ++++++++++++++++++++++++++++++++------ 1 file changed, 37 insertions(+), 7 deletions(-) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 8010385cdeff7..d23f5a06b7e7f 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -866,11 +866,6 @@ def set_categories(self, new_categories, ordered=None, rename=False, def rename_categories(self, new_categories, inplace=False): """ Renames categories. - The new categories can be either a list-like dict-like object. - If it is list-like, all items must be unique and the number of items - in the new categories must be the same as the number of items in the - old categories. - Raises ------ ValueError @@ -879,8 +874,22 @@ def rename_categories(self, new_categories, inplace=False): Parameters ---------- - new_categories : Index-like or dict-like (>=0.21.0) - The renamed categories. + new_categories : array-like or dict-like + The categories end up with + + .. versionchanged:: 0.21.0 + + new_categories may now also be dict-like, in which case it + specifies a mapping from old-categories to new. + + If it is list-like, all items must be unique and the number of + items in the new categories must match the existing number of + categories. + + If dict-like, categories not contained in the mapping are passed + through. Note that ``Series`` are considered list-like in this + context. + inplace : boolean (default: False) Whether or not to rename the categories inplace or return a copy of this categorical with renamed categories. @@ -896,6 +905,27 @@ def rename_categories(self, new_categories, inplace=False): remove_categories remove_unused_categories set_categories + + Examples + -------- + >>> c = Categorical(['a', 'a', 'b']) + >>> c.rename_categories([0, 1]) + [0, 0, 1] + Categories (2, int64): [0, 1] + + For dict-like ``new_categories``, extra keys are ignored and + categories not in the dictionary are passed through + + >>> c.rename_categories({'a': 'A', 'c': 'C'}) + [A, A, b] + Categories (2, object): [A, b] + + Series are considered array-like here, so the *values* are used + instead of the *index* + + >>> c.rename_categories(pd.Series([0, 1], index=['a', 'b'])) + [0, 0, 1] + Categories (2, int64): [0, 1] """ inplace = validate_bool_kwarg(inplace, 'inplace') cat = self if inplace else self.copy() From f659e783753925e3cfe59db36adf9d3d46aabc42 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 25 Oct 2017 14:18:22 -0500 Subject: [PATCH 3/9] Use list-like --- pandas/core/categorical.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index d23f5a06b7e7f..b03b77c4fe9f3 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -874,7 +874,7 @@ def rename_categories(self, new_categories, inplace=False): Parameters ---------- - new_categories : array-like or dict-like + new_categories : list-like or dict-like The categories end up with .. versionchanged:: 0.21.0 @@ -920,7 +920,7 @@ def rename_categories(self, new_categories, inplace=False): [A, A, b] Categories (2, object): [A, b] - Series are considered array-like here, so the *values* are used + Series are considered list-like here, so the *values* are used instead of the *index* >>> c.rename_categories(pd.Series([0, 1], index=['a', 'b'])) From 6460bc513ae3ccf30fe1be3cb473b19940422de4 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 26 Oct 2017 05:47:56 -0500 Subject: [PATCH 4/9] Warn --- doc/source/whatsnew/v0.21.0.txt | 31 ++++++++++++++++++++++++++++++- pandas/core/categorical.py | 20 ++++++++++++++++---- pandas/tests/test_categorical.py | 9 +++++++-- 3 files changed, 53 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 18f8858748df5..2df2f0e22e1e7 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -239,6 +239,36 @@ Now, to find prices per store/product, we can simply do: .pipe(lambda grp: grp.Revenue.sum()/grp.Quantity.sum()) .unstack().round(2)) + +.. _whatsnew_0210.enhancements.reanme_categories: + +``Categorical.rename_categories`` accepts a dict-like +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:meth:`Categorical.rename_categories` now accepts a dict-like argument for +``new_categories``. The previous categories are lookup up in the dictionary's +keys and replaced if found. The behavior of missing and extra keys is the same +as in :meth:`DataFrame.rename`. + +.. ipython:: python + + c = pd.Categorical(['a', 'a', 'b']) + c.rename_categories({"a": "eh", "b": "bee"}) + +.. warning:: + + To assist with upgrading pandas, ``rename_categories`` treats ``Series`` as + list-like. Typically, they are considered to be dict-like, and in a future + version of pandas ``rename_categories`` will change to treat them as + dict-like. + + .. ipython:: python + :okwarning: + + c.rename_categories(pd.Series([0, 1], index=['a', 'c'])) + + Follow the warning message's recommendations. + See the :ref:`documentation ` for more. .. _whatsnew_0210.enhancements.other: @@ -267,7 +297,6 @@ Other Enhancements - :func:`DataFrame.items` and :func:`Series.items` are now present in both Python 2 and 3 and is lazy in all cases. (:issue:`13918`, :issue:`17213`) - :func:`Styler.where` has been implemented as a convenience for :func:`Styler.applymap`. (:issue:`17474`) - :func:`MultiIndex.is_monotonic_decreasing` has been implemented. Previously returned ``False`` in all cases. (:issue:`16554`) -- :func:`Categorical.rename_categories` now accepts a dict-like argument as ``new_categories`` and only updates the categories found in that dict. (:issue:`17336`) - :func:`read_excel` raises ``ImportError`` with a better message if ``xlrd`` is not installed. (:issue:`17613`) - :func:`read_json` now accepts a ``chunksize`` parameter that can be used when ``lines=True``. If ``chunksize`` is passed, read_json now returns an iterator which reads in ``chunksize`` lines with each iteration. (:issue:`17048`) - :meth:`DataFrame.assign` will preserve the original order of ``**kwargs`` for Python 3.6+ users instead of sorting the column names. (:issue:`14207`) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index b03b77c4fe9f3..6c652c98f8d78 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -887,8 +887,12 @@ def rename_categories(self, new_categories, inplace=False): categories. If dict-like, categories not contained in the mapping are passed - through. Note that ``Series`` are considered list-like in this - context. + through. + + .. warning:: + + Currently, Series are considered list like. In a future version + of pandas they'll be considered dict-like. inplace : boolean (default: False) Whether or not to rename the categories inplace or return a copy of @@ -930,8 +934,16 @@ def rename_categories(self, new_categories, inplace=False): inplace = validate_bool_kwarg(inplace, 'inplace') cat = self if inplace else self.copy() - if (is_dict_like(new_categories) and - not isinstance(new_categories, ABCSeries)): + is_series = isinstance(new_categories, ABCSeries) + + if is_series: + msg = ("Treating Series 'new_categories' as a list-like and using " + "the values. In a future version, 'rename_categories' will " + "treat Series like a dictionary.\n" + "For dict-like, use 'new_categories.to_dict()'\n" + "For list-like, use 'new_categories.values'.") + warn(msg, FutureWarning, stacklevel=2) + if is_dict_like(new_categories) and not is_series: cat.categories = [new_categories.get(item, item) for item in cat.categories] else: diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index df33259430889..272ba25bf8f8a 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -1205,8 +1205,13 @@ def test_rename_categories(self): def test_rename_categories_series(self): # https://github.com/pandas-dev/pandas/issues/17981 - result = pd.Categorical(['a', 'b']).rename_categories( - pd.Series([0, 1])) + c = pd.Categorical(['a', 'b']) + xpr = "Treating Series 'new_categories' as a list-like " + with tm.assert_produces_warning(FutureWarning) as rec: + result = c.rename_categories(pd.Series([0, 1])) + + assert len(rec) == 1 + assert xpr in str(rec[0].message) expected = pd.Categorical([0, 1]) tm.assert_categorical_equal(result, expected) From 506f9ba74330242ca0575542f0cdc03faa61dc46 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 26 Oct 2017 05:50:53 -0500 Subject: [PATCH 5/9] Fix doc indent --- doc/source/whatsnew/v0.21.0.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 2df2f0e22e1e7..f9306526bb7c9 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -262,12 +262,12 @@ as in :meth:`DataFrame.rename`. version of pandas ``rename_categories`` will change to treat them as dict-like. - .. ipython:: python - :okwarning: + .. ipython:: python + :okwarning: - c.rename_categories(pd.Series([0, 1], index=['a', 'c'])) + c.rename_categories(pd.Series([0, 1], index=['a', 'c'])) - Follow the warning message's recommendations. + Follow the warning message's recommendations. See the :ref:`documentation ` for more. From 4f6a1dd6ee50c8e62ac41bff685a522a01a9a4ff Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 26 Oct 2017 06:29:02 -0500 Subject: [PATCH 6/9] Doc cleanup --- pandas/core/categorical.py | 32 ++++++++++---------------------- 1 file changed, 10 insertions(+), 22 deletions(-) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 6c652c98f8d78..de987f8801c09 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -875,19 +875,14 @@ def rename_categories(self, new_categories, inplace=False): Parameters ---------- new_categories : list-like or dict-like - The categories end up with - .. versionchanged:: 0.21.0 + * list-like: all items must be unique and the number of items in + the new categories must match the existing number of categories. - new_categories may now also be dict-like, in which case it - specifies a mapping from old-categories to new. - - If it is list-like, all items must be unique and the number of - items in the new categories must match the existing number of - categories. - - If dict-like, categories not contained in the mapping are passed - through. + * ..versionadded:: 0.21.0 dict-like: specifies a mapping from + old categories to new. Categories not contained in the mapping + are passed through and extra categories in the mapping are + ignored. .. warning:: @@ -923,27 +918,20 @@ def rename_categories(self, new_categories, inplace=False): >>> c.rename_categories({'a': 'A', 'c': 'C'}) [A, A, b] Categories (2, object): [A, b] - - Series are considered list-like here, so the *values* are used - instead of the *index* - - >>> c.rename_categories(pd.Series([0, 1], index=['a', 'b'])) - [0, 0, 1] - Categories (2, int64): [0, 1] """ inplace = validate_bool_kwarg(inplace, 'inplace') cat = self if inplace else self.copy() - is_series = isinstance(new_categories, ABCSeries) - - if is_series: + if isinstance(new_categories, ABCSeries): msg = ("Treating Series 'new_categories' as a list-like and using " "the values. In a future version, 'rename_categories' will " "treat Series like a dictionary.\n" "For dict-like, use 'new_categories.to_dict()'\n" "For list-like, use 'new_categories.values'.") warn(msg, FutureWarning, stacklevel=2) - if is_dict_like(new_categories) and not is_series: + new_categories = list(new_categories) + + if is_dict_like(new_categories): cat.categories = [new_categories.get(item, item) for item in cat.categories] else: From cbcc723d953a413423c6fe8c1c59d901bcca5308 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 26 Oct 2017 06:43:08 -0500 Subject: [PATCH 7/9] More doc cleanup --- pandas/core/categorical.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index de987f8801c09..c879ab1ad1cca 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -879,10 +879,10 @@ def rename_categories(self, new_categories, inplace=False): * list-like: all items must be unique and the number of items in the new categories must match the existing number of categories. - * ..versionadded:: 0.21.0 dict-like: specifies a mapping from + * dict-like: specifies a mapping from old categories to new. Categories not contained in the mapping are passed through and extra categories in the mapping are - ignored. + ignored. *New in verison 0.21.0*. .. warning:: @@ -895,7 +895,9 @@ def rename_categories(self, new_categories, inplace=False): Returns ------- - cat : Categorical with renamed categories added or None if inplace. + cat : Categorical or None + With ``inplace=False``, the new categorical is returned. + With ``inplace=True``, there is no return value. See also -------- From aec380db050f8f41316669ca1707ab50d2a49072 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 26 Oct 2017 12:55:53 -0500 Subject: [PATCH 8/9] Fix API reference --- doc/source/whatsnew/v0.21.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index f9306526bb7c9..61f5389883f3b 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -245,7 +245,7 @@ Now, to find prices per store/product, we can simply do: ``Categorical.rename_categories`` accepts a dict-like ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -:meth:`Categorical.rename_categories` now accepts a dict-like argument for +:meth:`~Series.cat.rename_categories` now accepts a dict-like argument for ``new_categories``. The previous categories are lookup up in the dictionary's keys and replaced if found. The behavior of missing and extra keys is the same as in :meth:`DataFrame.rename`. From 0e086cac8f66f9ea4efcf27f591d3769241db72f Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 26 Oct 2017 16:44:11 -0500 Subject: [PATCH 9/9] Typos --- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/core/categorical.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 61f5389883f3b..11106554483e0 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -246,7 +246,7 @@ Now, to find prices per store/product, we can simply do: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ :meth:`~Series.cat.rename_categories` now accepts a dict-like argument for -``new_categories``. The previous categories are lookup up in the dictionary's +``new_categories``. The previous categories are looked up in the dictionary's keys and replaced if found. The behavior of missing and extra keys is the same as in :meth:`DataFrame.rename`. diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index c879ab1ad1cca..e709c771b7d18 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -882,7 +882,7 @@ def rename_categories(self, new_categories, inplace=False): * dict-like: specifies a mapping from old categories to new. Categories not contained in the mapping are passed through and extra categories in the mapping are - ignored. *New in verison 0.21.0*. + ignored. *New in version 0.21.0*. .. warning::