From 07ceda8e78eb9279e9fa0ebab8ed1d4cfcf972eb Mon Sep 17 00:00:00 2001 From: Steffen Rehberg Date: Mon, 25 Jul 2022 12:00:30 +0200 Subject: [PATCH 1/2] DOC: Clarify sorting and order of categoricals and fix typo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without the addition in the text it‘s unclear, what s.sort_values(inplace=True) should demonstrate. Show some meaningful output now. Also, when using astype to contrast ordered/unordered behavior, I think it‘s clearer to use the existing, unordered Series than to create a new one. --- doc/source/user_guide/categorical.rst | 14 +++++++++----- pandas/core/arrays/categorical.py | 2 +- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/doc/source/user_guide/categorical.rst b/doc/source/user_guide/categorical.rst index 0105cf99193dd..dd5854ef66a6f 100644 --- a/doc/source/user_guide/categorical.rst +++ b/doc/source/user_guide/categorical.rst @@ -439,15 +439,19 @@ Sorting and order .. _categorical.sort: If categorical data is ordered (``s.cat.ordered == True``), then the order of the categories has a -meaning and certain operations are possible. If the categorical is unordered, ``.min()/.max()`` will raise a ``TypeError``. +meaning and certain operations are possible. If the categorical is unordered, the data can still be sorted, +but ``.min()/.max()`` will raise a ``TypeError``. .. ipython:: python s = pd.Series(pd.Categorical(["a", "b", "c", "a"], ordered=False)) - s.sort_values(inplace=True) - s = pd.Series(["a", "b", "c", "a"]).astype(CategoricalDtype(ordered=True)) - s.sort_values(inplace=True) - s + s.sort_values(ascending=False) + try: + s.min() + except TypeError as e: + print(f"TypeError: {e}") + s = s.astype(CategoricalDtype(ordered=True)) + s.sort_values() s.min(), s.max() You can set categorical data to be ordered by using ``as_ordered()`` or unordered by using ``as_unordered()``. These will by diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 2c3b7c2f2589d..8503ce30bd325 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -256,7 +256,7 @@ class Categorical(NDArrayBackedExtensionArray, PandasObject, ObjectStringArrayMi """ Represent a categorical variable in classic R / S-plus fashion. - `Categoricals` can only take on only a limited, and usually fixed, number + `Categoricals` can only take on a limited, and usually fixed, number of possible values (`categories`). In contrast to statistical categorical variables, a `Categorical` might have an order, but numerical operations (additions, divisions, ...) are not possible. From f77645f68aa79e8dc3036fd17211854e5be769d8 Mon Sep 17 00:00:00 2001 From: Steffen Rehberg Date: Fri, 5 Aug 2022 15:09:10 +0200 Subject: [PATCH 2/2] DOC: Clarify sorting and order of categoricals MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without the addition in the text it‘s unclear, what s.sort_values(inplace=True) should demonstrate. Show some meaningful output now. Also, when using astype to contrast ordered/unordered behavior, I think it‘s clearer to use the existing, unordered Series than to create a new one. Use :okexept: directive as requested by mroeschke. --- doc/source/user_guide/categorical.rst | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/doc/source/user_guide/categorical.rst b/doc/source/user_guide/categorical.rst index dd5854ef66a6f..be26427d11d81 100644 --- a/doc/source/user_guide/categorical.rst +++ b/doc/source/user_guide/categorical.rst @@ -443,13 +443,11 @@ meaning and certain operations are possible. If the categorical is unordered, th but ``.min()/.max()`` will raise a ``TypeError``. .. ipython:: python + :okexcept: s = pd.Series(pd.Categorical(["a", "b", "c", "a"], ordered=False)) s.sort_values(ascending=False) - try: - s.min() - except TypeError as e: - print(f"TypeError: {e}") + s.min() s = s.astype(CategoricalDtype(ordered=True)) s.sort_values() s.min(), s.max()