From b265349ec97eae68a93e9192db87b5c6da9e4c5a Mon Sep 17 00:00:00 2001
From: kpflugshaupt <kpflugshaupt@gmail.com>
Date: Thu, 28 Mar 2019 11:55:40 +0100
Subject: [PATCH 01/10] BUG: Fix groupby sorting on ordered Categoricals
 (GH25871) (#1)

* BUG: Fix groupby on ordered Categoricals (GH25871)

As documented in #25871, groupby() on an ordered Categorical messes up category order when 'observed=True' is specified.
Specifically, group labels will be ordered by first occurrence (as for an unordered Categorical), but grouped aggregation results will retain the Categorical's order.
The fix is a modified subset of #25173, which fixes a related case, but has not been merged yet.

* BUG: Fix groupby on ordered Categoricals (GH25871)

* new test

* Fix groupby on ordered Categoricals (GH25871)

Testing all combinations of:
- ordered vs. unordered grouping column
- 'observed' True vs. False
- 'sort' True vs. False
In all cases, result group ordering must be correct.
The test is built such that the result index labels are equal to aggregation results if all goes well (except for the one unobserved category)
---
 doc/source/whatsnew/v0.25.0.rst          |  2 +-
 pandas/core/groupby/grouper.py           |  2 ++
 pandas/tests/groupby/test_categorical.py | 22 ++++++++++++++++++++++
 3 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 561562f367db2..8d246b16ea4c4 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -347,7 +347,7 @@ Groupby/Resample/Rolling
 - Bug in :func:`pandas.core.groupby.GroupBy.agg` when applying a aggregation function to timezone aware data (:issue:`23683`)
 - Bug in :func:`pandas.core.groupby.GroupBy.first` and :func:`pandas.core.groupby.GroupBy.last` where timezone information would be dropped (:issue:`21603`)
 - Ensured that ordering of outputs in ``groupby`` aggregation functions is consistent across all versions of Python (:issue:`25692`)
-
+- Ensured that result group order is correct when grouping on an ordered Categorical and specifying ``observed=True`` (:issue:`25871`)
 
 Reshaping
 ^^^^^^^^^
diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index e470a32b85cd6..335d40dd16949 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -302,6 +302,8 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None,
                 if observed:
                     codes = algorithms.unique1d(self.grouper.codes)
                     codes = codes[codes != -1]
+                    if sort or self.grouper.ordered:
+                        codes = np.sort(codes)
                 else:
                     codes = np.arange(len(categories))
 
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index e118135ccc75d..8dfb27ee34232 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -453,6 +453,28 @@ def test_dataframe_categorical_with_nan(observed):
     tm.assert_frame_equal(result, expected)
 
 
+@pytest.mark.parametrize("ordered",  [True, False])
+@pytest.mark.parametrize("observed", [True, False])
+@pytest.mark.parametrize("sort",     [True, False])
+def test_dataframe_categorical_ordered_observed_sort(ordered, observed, sort):
+    # GH 25871: Fix groupby sorting on ordered Categoricals 
+    # Build a dataframe with a Categorical having one unobserved category ('AWOL'), and a Series with identical values
+    cat = pd.Categorical(['d', 'a', 'b', 'a', 'd', 'b'], categories=['a', 'b', 'AWOL', 'd'], ordered=ordered)
+    val = pd.Series     (['d', 'a', 'b', 'a', 'd', 'b'])
+    df  = pd.DataFrame({'cat': cat, 'val': val})
+
+    # aggregate on the Categorical
+    result = df.groupby('cat', observed=observed, sort=sort)['val'].agg('first')
+    
+    # If ordering is correct, we expect index labels equal to aggregation results,
+    # except for 'observed=False', when index contains 'AWOL' and aggregation None
+    label = pd.Series(result.index.array, dtype='object')
+    aggr  = pd.Series(result.array)
+    if not observed:
+        aggr[aggr.isna()] = 'AWOL'
+    tm.assert_equal(label, aggr)    
+
+    
 def test_datetime():
     # GH9049: ensure backward compatibility
     levels = pd.date_range('2014-01-01', periods=4)

From 50e7d64f8a7be0848d32001611a8797bb8ef0453 Mon Sep 17 00:00:00 2001
From: kpflugshaupt <kpflugshaupt@gmail.com>
Date: Thu, 28 Mar 2019 12:15:28 +0100
Subject: [PATCH 02/10] BUG: GH25871 -- fix PEP 8 issues on test source

---
 pandas/tests/groupby/test_categorical.py | 30 ++++++++++++++----------
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index 8dfb27ee34232..bddeee5c10014 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -453,28 +453,32 @@ def test_dataframe_categorical_with_nan(observed):
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.parametrize("ordered",  [True, False])
+@pytest.mark.parametrize("ordered", [True, False])
 @pytest.mark.parametrize("observed", [True, False])
-@pytest.mark.parametrize("sort",     [True, False])
+@pytest.mark.parametrize("sort", [True, False])
 def test_dataframe_categorical_ordered_observed_sort(ordered, observed, sort):
-    # GH 25871: Fix groupby sorting on ordered Categoricals 
-    # Build a dataframe with a Categorical having one unobserved category ('AWOL'), and a Series with identical values
-    cat = pd.Categorical(['d', 'a', 'b', 'a', 'd', 'b'], categories=['a', 'b', 'AWOL', 'd'], ordered=ordered)
-    val = pd.Series     (['d', 'a', 'b', 'a', 'd', 'b'])
-    df  = pd.DataFrame({'cat': cat, 'val': val})
+    # GH 25871: Fix groupby sorting on ordered Categoricals
+    # Build a dataframe with a Categorical having one unobserved category ('AWOL'), 
+    # and a Series with identical values
+    cat = pd.Categorical(['d', 'a', 'b', 'a', 'd', 'b'], 
+                         categories=['a', 'b', 'AWOL', 'd'], 
+                         ordered=ordered)
+    val = pd.Series(['d', 'a', 'b', 'a', 'd', 'b'])
+    df = pd.DataFrame({'cat': cat, 'val': val})
 
     # aggregate on the Categorical
-    result = df.groupby('cat', observed=observed, sort=sort)['val'].agg('first')
+    result = (df.groupby('cat', observed=observed, sort=sort)['val']
+                .aggregate('first'))
     
-    # If ordering is correct, we expect index labels equal to aggregation results,
-    # except for 'observed=False', when index contains 'AWOL' and aggregation None
+    # If ordering works, we expect index labels equal to aggregation results,
+    # except for 'observed=False': index contains 'AWOL' and aggregation None
     label = pd.Series(result.index.array, dtype='object')
-    aggr  = pd.Series(result.array)
+    aggr = pd.Series(result.array)
     if not observed:
         aggr[aggr.isna()] = 'AWOL'
-    tm.assert_equal(label, aggr)    
+    tm.assert_equal(label, aggr)
+
 
-    
 def test_datetime():
     # GH9049: ensure backward compatibility
     levels = pd.date_range('2014-01-01', periods=4)

From 0aaa3474909cb39d13d98f4bd51684b84f32af49 Mon Sep 17 00:00:00 2001
From: kpflugshaupt <kpflugshaupt@gmail.com>
Date: Thu, 28 Mar 2019 12:34:42 +0100
Subject: [PATCH 03/10] BUG: GH25871 -- fix PEP 8 issues on test source

2nd shot...
---
 pandas/tests/groupby/test_categorical.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index bddeee5c10014..2e9e7033162bf 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -458,10 +458,10 @@ def test_dataframe_categorical_with_nan(observed):
 @pytest.mark.parametrize("sort", [True, False])
 def test_dataframe_categorical_ordered_observed_sort(ordered, observed, sort):
     # GH 25871: Fix groupby sorting on ordered Categoricals
-    # Build a dataframe with a Categorical having one unobserved category ('AWOL'), 
+    # Build a dataframe with cat having one unobserved category ('AWOL'),
     # and a Series with identical values
-    cat = pd.Categorical(['d', 'a', 'b', 'a', 'd', 'b'], 
-                         categories=['a', 'b', 'AWOL', 'd'], 
+    cat = pd.Categorical(['d', 'a', 'b', 'a', 'd', 'b'],
+                         categories=['a', 'b', 'AWOL', 'd'],
                          ordered=ordered)
     val = pd.Series(['d', 'a', 'b', 'a', 'd', 'b'])
     df = pd.DataFrame({'cat': cat, 'val': val})
@@ -469,7 +469,7 @@ def test_dataframe_categorical_ordered_observed_sort(ordered, observed, sort):
     # aggregate on the Categorical
     result = (df.groupby('cat', observed=observed, sort=sort)['val']
                 .aggregate('first'))
-    
+
     # If ordering works, we expect index labels equal to aggregation results,
     # except for 'observed=False': index contains 'AWOL' and aggregation None
     label = pd.Series(result.index.array, dtype='object')

From 91cc505534bff17abc2a8bae8032cf99b42e6a11 Mon Sep 17 00:00:00 2001
From: kpflugshaupt <kpflugshaupt@gmail.com>
Date: Thu, 28 Mar 2019 16:28:04 +0100
Subject: [PATCH 04/10] BUG: GH25871 -- adapt test_groupby_levels_and_columns

This test had an adjustment for column order when 'observed=True' is set. This hid the fact that, with that parameter set, the data columns were not actually reordered -- it was just the column group labels (analogous to index labels in #25871), leaving the data columns in place and out of sync. (This was not visible as the data consisted only of ones).

I've made the test more sensitive (unsyncing of data columns will be caught now) and removed the special case for 'observed=True'. As there are no unobserved categories in this case, the result should not be influenced by this parameter.
---
 pandas/tests/groupby/test_grouping.py | 29 +++++++++++++--------------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py
index 19b3c59d8ec8e..1006523fb239a 100644
--- a/pandas/tests/groupby/test_grouping.py
+++ b/pandas/tests/groupby/test_grouping.py
@@ -253,28 +253,27 @@ def test_groupby_levels_and_columns(self):
         tm.assert_frame_equal(by_levels, by_columns)
 
     def test_groupby_categorical_index_and_columns(self, observed):
-        # GH18432
+        # GH18432, adapted for GH25871
         columns = ['A', 'B', 'A', 'B']
         categories = ['B', 'A']
-        data = np.ones((5, 4), int)
+        data = np.array([[1, 2, 1, 2],
+                         [1, 2, 1, 2],
+                         [1, 2, 1, 2],
+                         [1, 2, 1, 2],
+                         [1, 2, 1, 2]], int)
         cat_columns = CategoricalIndex(columns,
                                        categories=categories,
                                        ordered=True)
         df = DataFrame(data=data, columns=cat_columns)
         result = df.groupby(axis=1, level=0, observed=observed).sum()
-        expected_data = 2 * np.ones((5, 2), int)
-
-        if observed:
-            # if we are not-observed we undergo a reindex
-            # so need to adjust the output as our expected sets us up
-            # to be non-observed
-            expected_columns = CategoricalIndex(['A', 'B'],
-                                                categories=categories,
-                                                ordered=True)
-        else:
-            expected_columns = CategoricalIndex(categories,
-                                                categories=categories,
-                                                ordered=True)
+        expected_data = np.array([[4, 2],
+                                  [4, 2],
+                                  [4, 2],
+                                  [4, 2],
+                                  [4, 2]], int)
+        expected_columns = CategoricalIndex(categories,
+                                            categories=categories,
+                                            ordered=True)
         expected = DataFrame(data=expected_data, columns=expected_columns)
         assert_frame_equal(result, expected)
 

From 2abf281fba8d84e045f36d316459403fc428a396 Mon Sep 17 00:00:00 2001
From: kpflugshaupt <kpflugshaupt@me.com>
Date: Fri, 29 Mar 2019 09:24:24 +0100
Subject: [PATCH 05/10] Fix whatsnew file formatting

---
 doc/source/whatsnew/v0.25.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 8d246b16ea4c4..39343f47d3390 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -347,7 +347,7 @@ Groupby/Resample/Rolling
 - Bug in :func:`pandas.core.groupby.GroupBy.agg` when applying a aggregation function to timezone aware data (:issue:`23683`)
 - Bug in :func:`pandas.core.groupby.GroupBy.first` and :func:`pandas.core.groupby.GroupBy.last` where timezone information would be dropped (:issue:`21603`)
 - Ensured that ordering of outputs in ``groupby`` aggregation functions is consistent across all versions of Python (:issue:`25692`)
-- Ensured that result group order is correct when grouping on an ordered Categorical and specifying ``observed=True`` (:issue:`25871`)
+- Ensured that result group order is correct when grouping on an ordered ``Categorical`` and specifying ``observed=True`` (:issue:`25871`)
 
 Reshaping
 ^^^^^^^^^

From b8b2011cf8fe14daac257098db0a3eb1ad94664b Mon Sep 17 00:00:00 2001
From: kpflugshaupt <kpflugshaupt@me.com>
Date: Fri, 29 Mar 2019 09:25:24 +0100
Subject: [PATCH 06/10] Extend unit test with code sample from #25167

---
 pandas/tests/groupby/test_categorical.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index 2e9e7033162bf..8d412cee32bb2 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -476,7 +476,16 @@ def test_dataframe_categorical_ordered_observed_sort(ordered, observed, sort):
     aggr = pd.Series(result.array)
     if not observed:
         aggr[aggr.isna()] = 'AWOL'
-    tm.assert_equal(label, aggr)
+    tm.assert_series_equal(label, aggr)
+
+    # GH 25167: Groupby with observed=True doesn't sort
+    df = pd.DataFrame({'A': pd.Categorical(['b', 'a']), 'B': [1, 2]})
+    result = df.groupby('A', observed=True).sum()
+    expected = pd.DataFrame({'B': [2, 1]},
+                            index=pd.CategoricalIndex(['b', 'a'], categories=['a', 'b'],
+                                                      ordered=False, name='A', dtype='category'),
+                            dtype='int')
+    tm.assert_frame_equal(result, expected)
 
 
 def test_datetime():

From 5200544d3eb6d8c18d62e023152858131261bc10 Mon Sep 17 00:00:00 2001
From: kpflugshaupt <kpflugshaupt@me.com>
Date: Fri, 29 Mar 2019 14:24:17 +0100
Subject: [PATCH 07/10] Fix test: expected values

---
 pandas/tests/groupby/test_categorical.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index 8d412cee32bb2..feb9bdbb545e5 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -481,7 +481,7 @@ def test_dataframe_categorical_ordered_observed_sort(ordered, observed, sort):
     # GH 25167: Groupby with observed=True doesn't sort
     df = pd.DataFrame({'A': pd.Categorical(['b', 'a']), 'B': [1, 2]})
     result = df.groupby('A', observed=True).sum()
-    expected = pd.DataFrame({'B': [2, 1]},
+    expected = pd.DataFrame({'B': [1, 2]},
                             index=pd.CategoricalIndex(['b', 'a'], categories=['a', 'b'],
                                                       ordered=False, name='A', dtype='category'),
                             dtype='int')

From 916f3eb95361dfc060e7d4ab04800d1f3dfeae14 Mon Sep 17 00:00:00 2001
From: kpflugshaupt <kpflugshaupt@me.com>
Date: Fri, 29 Mar 2019 15:11:12 +0100
Subject: [PATCH 08/10] Fix test: expected values dtype (Windows takes 'int' as
 'int32', instead of 'int64'), also PEP 8 fixes

---
 pandas/tests/groupby/test_categorical.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index feb9bdbb545e5..0a389eab568ca 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -481,10 +481,13 @@ def test_dataframe_categorical_ordered_observed_sort(ordered, observed, sort):
     # GH 25167: Groupby with observed=True doesn't sort
     df = pd.DataFrame({'A': pd.Categorical(['b', 'a']), 'B': [1, 2]})
     result = df.groupby('A', observed=True).sum()
+    expected_index = pd.CategoricalIndex(['b', 'a'],
+                                         categories=['a', 'b'],
+                                         ordered=False, name='A',
+                                         dtype='category')
     expected = pd.DataFrame({'B': [1, 2]},
-                            index=pd.CategoricalIndex(['b', 'a'], categories=['a', 'b'],
-                                                      ordered=False, name='A', dtype='category'),
-                            dtype='int')
+                            index=expected_index,
+                            dtype='int64')
     tm.assert_frame_equal(result, expected)
 
 

From df8a995eb5f080b8cf8079174d52296122923667 Mon Sep 17 00:00:00 2001
From: kpflugshaupt <kpflugshaupt@me.com>
Date: Sat, 30 Mar 2019 19:52:47 +0100
Subject: [PATCH 09/10] Added #25167 to whatsnew file as resolved

---
 doc/source/whatsnew/v0.25.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 39343f47d3390..4edeadd3d70f2 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -347,7 +347,7 @@ Groupby/Resample/Rolling
 - Bug in :func:`pandas.core.groupby.GroupBy.agg` when applying a aggregation function to timezone aware data (:issue:`23683`)
 - Bug in :func:`pandas.core.groupby.GroupBy.first` and :func:`pandas.core.groupby.GroupBy.last` where timezone information would be dropped (:issue:`21603`)
 - Ensured that ordering of outputs in ``groupby`` aggregation functions is consistent across all versions of Python (:issue:`25692`)
-- Ensured that result group order is correct when grouping on an ordered ``Categorical`` and specifying ``observed=True`` (:issue:`25871`)
+- Ensured that result group order is correct when grouping on an ordered ``Categorical`` and specifying ``observed=True`` (:issue:`25871`, :issue:`25167`)
 
 Reshaping
 ^^^^^^^^^

From 21ff12cef58e3c718106f9ccff92865a4289962f Mon Sep 17 00:00:00 2001
From: kpflugshaupt <kpflugshaupt@gmail.com>
Date: Wed, 3 Apr 2019 10:04:28 +0200
Subject: [PATCH 10/10] Improve test reporting

Added a generic assert with a custom message to make problem more obvious.
---
 pandas/tests/groupby/test_categorical.py | 36 ++++++++++--------------
 1 file changed, 15 insertions(+), 21 deletions(-)

diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index 0a389eab568ca..4bbe44e0a4694 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -458,37 +458,31 @@ def test_dataframe_categorical_with_nan(observed):
 @pytest.mark.parametrize("sort", [True, False])
 def test_dataframe_categorical_ordered_observed_sort(ordered, observed, sort):
     # GH 25871: Fix groupby sorting on ordered Categoricals
-    # Build a dataframe with cat having one unobserved category ('AWOL'),
+    # GH 25167: Groupby with observed=True doesn't sort
+
+    # Build a dataframe with cat having one unobserved category ('missing'),
     # and a Series with identical values
-    cat = pd.Categorical(['d', 'a', 'b', 'a', 'd', 'b'],
-                         categories=['a', 'b', 'AWOL', 'd'],
-                         ordered=ordered)
+    label = pd.Categorical(['d', 'a', 'b', 'a', 'd', 'b'],
+                           categories=['a', 'b', 'missing', 'd'],
+                           ordered=ordered)
     val = pd.Series(['d', 'a', 'b', 'a', 'd', 'b'])
-    df = pd.DataFrame({'cat': cat, 'val': val})
+    df = pd.DataFrame({'label': label, 'val': val})
 
     # aggregate on the Categorical
-    result = (df.groupby('cat', observed=observed, sort=sort)['val']
+    result = (df.groupby('label', observed=observed, sort=sort)['val']
                 .aggregate('first'))
 
     # If ordering works, we expect index labels equal to aggregation results,
-    # except for 'observed=False': index contains 'AWOL' and aggregation None
+    # except for 'observed=False': label 'missing' has aggregation None
     label = pd.Series(result.index.array, dtype='object')
     aggr = pd.Series(result.array)
     if not observed:
-        aggr[aggr.isna()] = 'AWOL'
-    tm.assert_series_equal(label, aggr)
-
-    # GH 25167: Groupby with observed=True doesn't sort
-    df = pd.DataFrame({'A': pd.Categorical(['b', 'a']), 'B': [1, 2]})
-    result = df.groupby('A', observed=True).sum()
-    expected_index = pd.CategoricalIndex(['b', 'a'],
-                                         categories=['a', 'b'],
-                                         ordered=False, name='A',
-                                         dtype='category')
-    expected = pd.DataFrame({'B': [1, 2]},
-                            index=expected_index,
-                            dtype='int64')
-    tm.assert_frame_equal(result, expected)
+        aggr[aggr.isna()] = 'missing'
+    if not all(label == aggr):
+        msg = ('Labels and aggregation results not consistently sorted\n' +
+               'for (ordered={}, observed={}, sort={})\n' +
+               'Result:\n{}').format(ordered, observed, sort, result)
+        assert False, msg
 
 
 def test_datetime():