From a5d6d1a935bc17a29a42f16abe0827aaa60381e2 Mon Sep 17 00:00:00 2001
From: krsnik93 <ivankrsnik93@gmail.com>
Date: Sun, 19 May 2019 21:38:05 +0100
Subject: [PATCH 01/14] Fix 'observed' kwarg not doing anything on
 SeriesGroupBy

---
 pandas/core/groupby/generic.py       | 90 +++++-----------------------
 pandas/core/groupby/groupby.py       | 66 +++++++++++++++++++-
 pandas/tests/groupby/test_groupby.py | 59 +++++++++++++++++-
 3 files changed, 138 insertions(+), 77 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 2f665975f96bd..32933c3385e25 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -25,7 +25,6 @@
 from pandas.core.dtypes.missing import isna, notna
 
 import pandas.core.algorithms as algorithms
-from pandas.core.arrays import Categorical
 from pandas.core.base import DataError, SpecificationError
 import pandas.core.common as com
 from pandas.core.frame import DataFrame
@@ -33,7 +32,7 @@
 from pandas.core.groupby import base
 from pandas.core.groupby.groupby import (
     GroupBy, _apply_docs, _transform_template)
-from pandas.core.index import CategoricalIndex, Index, MultiIndex
+from pandas.core.index import Index, MultiIndex
 import pandas.core.indexes.base as ibase
 from pandas.core.internals import BlockManager, make_block
 from pandas.core.series import Series
@@ -834,9 +833,10 @@ def _wrap_output(self, output, index, names=None):
             return Series(output, index=index, name=name)
 
     def _wrap_aggregated_output(self, output, names=None):
-        return self._wrap_output(output=output,
-                                 index=self.grouper.result_index,
-                                 names=names)
+        result = self._wrap_output(output=output,
+                                   index=self.grouper.result_index,
+                                   names=names)
+        return self._reindex_output(result)._convert(datetime=True)
 
     def _wrap_transformed_output(self, output, names=None):
         return self._wrap_output(output=output,
@@ -856,13 +856,15 @@ def _get_index():
             return index
 
         if isinstance(values[0], dict):
-            # GH #823
+            # GH #823 #24880
             index = _get_index()
-            result = DataFrame(values, index=index).stack()
+            result = self._reindex_output(DataFrame(values, index=index))
+            dropna = self.observed  # if self.observed is False, keep all-NaN rows created while re-indexing
+            result = result.stack(dropna=dropna)
             result.name = self._selection_name
             return result
 
-        if isinstance(values[0], (Series, dict)):
+        if isinstance(values[0], Series):
             return self._concat_objects(keys, values,
                                         not_indexed_same=not_indexed_same)
         elif isinstance(values[0], DataFrame):
@@ -870,9 +872,9 @@ def _get_index():
             return self._concat_objects(keys, values,
                                         not_indexed_same=not_indexed_same)
         else:
-            # GH #6265
-            return Series(values, index=_get_index(),
-                          name=self._selection_name)
+            # GH #6265 #24880
+            result = Series(values, index=_get_index(), name=self._selection_name)
+            return self._reindex_output(result)
 
     def _aggregate_named(self, func, *args, **kwargs):
         result = OrderedDict()
@@ -1335,7 +1337,8 @@ def _gotitem(self, key, ndim, subset=None):
             if subset is None:
                 subset = self.obj[key]
             return SeriesGroupBy(subset, selection=key,
-                                 grouper=self.grouper)
+                                 grouper=self.grouper,
+                                 observed=self.observed)
 
         raise AssertionError("invalid ndim for _gotitem")
 
@@ -1407,69 +1410,6 @@ def _wrap_agged_blocks(self, items, blocks):
 
         return self._reindex_output(result)._convert(datetime=True)
 
-    def _reindex_output(self, result):
-        """
-        If we have categorical groupers, then we want to make sure that
-        we have a fully reindex-output to the levels. These may have not
-        participated in the groupings (e.g. may have all been
-        nan groups);
-
-        This can re-expand the output space
-        """
-
-        # we need to re-expand the output space to accomodate all values
-        # whether observed or not in the cartesian product of our groupes
-        groupings = self.grouper.groupings
-        if groupings is None:
-            return result
-        elif len(groupings) == 1:
-            return result
-
-        # if we only care about the observed values
-        # we are done
-        elif self.observed:
-            return result
-
-        # reindexing only applies to a Categorical grouper
-        elif not any(isinstance(ping.grouper, (Categorical, CategoricalIndex))
-                     for ping in groupings):
-            return result
-
-        levels_list = [ping.group_index for ping in groupings]
-        index, _ = MultiIndex.from_product(
-            levels_list, names=self.grouper.names).sortlevel()
-
-        if self.as_index:
-            d = {self.obj._get_axis_name(self.axis): index, 'copy': False}
-            return result.reindex(**d)
-
-        # GH 13204
-        # Here, the categorical in-axis groupers, which need to be fully
-        # expanded, are columns in `result`. An idea is to do:
-        # result = result.set_index(self.grouper.names)
-        #                .reindex(index).reset_index()
-        # but special care has to be taken because of possible not-in-axis
-        # groupers.
-        # So, we manually select and drop the in-axis grouper columns,
-        # reindex `result`, and then reset the in-axis grouper columns.
-
-        # Select in-axis groupers
-        in_axis_grps = ((i, ping.name) for (i, ping)
-                        in enumerate(groupings) if ping.in_axis)
-        g_nums, g_names = zip(*in_axis_grps)
-
-        result = result.drop(labels=list(g_names), axis=1)
-
-        # Set a temp index and reindex (possibly expanding)
-        result = result.set_index(self.grouper.result_index
-                                  ).reindex(index, copy=False)
-
-        # Reset in-axis grouper columns
-        # (using level numbers `g_nums` because level names may not be unique)
-        result = result.reset_index(level=g_nums)
-
-        return result.reset_index(drop=True)
-
     def _iterate_column_groupbys(self):
         for i, colname in enumerate(self._selected_obj.columns):
             yield colname, SeriesGroupBy(self._selected_obj.iloc[:, i],
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 4e9e3b4963b6d..3d8716def20fb 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -17,6 +17,7 @@ class providing the base-class of operations.
 
 import numpy as np
 
+from pandas.core.arrays import Categorical
 from pandas._config.config import option_context
 
 from pandas._libs import Timestamp
@@ -42,7 +43,7 @@ class providing the base-class of operations.
 from pandas.core.frame import DataFrame
 from pandas.core.generic import NDFrame
 from pandas.core.groupby import base
-from pandas.core.index import Index, MultiIndex
+from pandas.core.index import Index, CategoricalIndex, MultiIndex
 from pandas.core.series import Series
 from pandas.core.sorting import get_group_index_sorter
 
@@ -2301,6 +2302,69 @@ def tail(self, n=5):
         mask = self._cumcount_array(ascending=False) < n
         return self._selected_obj[mask]
 
+    def _reindex_output(self, result):
+        """
+        If we have categorical groupers, then we want to make sure that
+        we have a fully reindex-output to the levels. These may have not
+        participated in the groupings (e.g. may have all been
+        nan groups);
+
+        This can re-expand the output space
+        """
+
+        # we need to re-expand the output space to accomodate all values
+        # whether observed or not in the cartesian product of our groupes
+        groupings = self.grouper.groupings
+        if groupings is None:
+            return result
+        elif len(groupings) == 1:
+            return result
+
+        # if we only care about the observed values
+        # we are done
+        elif self.observed:
+            return result
+
+        # reindexing only applies to a Categorical grouper
+        elif not any(isinstance(ping.grouper, (Categorical, CategoricalIndex))
+                     for ping in groupings):
+            return result
+
+        levels_list = [ping.group_index for ping in groupings]
+        index, _ = MultiIndex.from_product(
+            levels_list, names=self.grouper.names).sortlevel()
+
+        if self.as_index:
+            d = {self.obj._get_axis_name(self.axis): index, 'copy': False}
+            return result.reindex(**d)
+
+        # GH 13204
+        # Here, the categorical in-axis groupers, which need to be fully
+        # expanded, are columns in `result`. An idea is to do:
+        # result = result.set_index(self.grouper.names)
+        #                .reindex(index).reset_index()
+        # but special care has to be taken because of possible not-in-axis
+        # groupers.
+        # So, we manually select and drop the in-axis grouper columns,
+        # reindex `result`, and then reset the in-axis grouper columns.
+
+        # Select in-axis groupers
+        in_axis_grps = ((i, ping.name) for (i, ping)
+                        in enumerate(groupings) if ping.in_axis)
+        g_nums, g_names = zip(*in_axis_grps)
+
+        result = result.drop(labels=list(g_names), axis=1)
+
+        # Set a temp index and reindex (possibly expanding)
+        result = result.set_index(self.grouper.result_index
+                                  ).reindex(index, copy=False)
+
+        # Reset in-axis grouper columns
+        # (using level numbers `g_nums` because level names may not be unique)
+        result = result.reset_index(level=g_nums)
+
+        return result.reset_index(drop=True)
+
 
 GroupBy._add_numeric_operations()
 
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 2511063110f92..6aa07eac681bf 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -10,7 +10,7 @@
 
 import pandas as pd
 from pandas import (
-    DataFrame, Index, MultiIndex, Series, Timestamp, date_range, read_csv)
+    DataFrame, Index, MultiIndex, CategoricalIndex, Series, Timestamp, date_range, read_csv)
 import pandas.core.common as com
 import pandas.util.testing as tm
 from pandas.util.testing import (
@@ -1736,3 +1736,60 @@ def test_groupby_multiindex_series_keys_len_equal_group_axis():
     expected = pd.Series([3], index=ei)
 
     assert_series_equal(result, expected)
+
+
+def test_groupby_observed():
+    # GH 24880
+    df = DataFrame({'a': ['x', 'x', 'x', 'y'],
+                    'b': ['a', 'a', 'b', 'a'],
+                    'c': [1, 2, 3, 4]})
+    df['a'] = df['a'].astype('category')
+    df['b'] = df['b'].astype('category')
+
+    # test .agg and .apply when observed == False
+    levels = [CategoricalIndex(['x', 'y'], categories=['x', 'y'], ordered=False),
+              CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=False)]
+    index, _ = MultiIndex.from_product(levels, names=['a', 'b']).sortlevel()
+    expected = pd.Series(data=[3, 3, 4, np.nan], index=index, name='c')
+    actual_agg = df.groupby(['a', 'b']).c.agg(sum)
+    actual_apply = df.groupby(['a', 'b']).c.apply(sum)
+    assert_series_equal(expected, actual_agg)
+    assert_series_equal(expected, actual_apply)
+
+    # test .agg when observed == True
+    index = MultiIndex.from_frame(df[['a', 'b']].drop_duplicates())
+    expected = pd.Series([3, 3, 4], index=index, name='c')
+    actual = df.groupby(['a', 'b'], observed=True).c.agg(sum)
+    assert_series_equal(expected, actual)
+
+    # test .apply when observed == True
+    index = MultiIndex.from_tuples([('x', 'a'), ('x', 'b'), ('y', 'a')], names=('a', 'b'))
+    expected = pd.Series([3, 3, 4], index=index, name='c')
+    actual = df.groupby(['a', 'b'], observed=True).c.apply(sum)
+    assert_series_equal(expected, actual)
+
+
+def test_groupby_observed_apply_lambda_returns_dict():
+    # GH 24880
+    df = DataFrame({'a': ['x', 'x', 'x', 'y'],
+                    'b': ['a', 'a', 'b', 'a'],
+                    'c': [1, 2, 3, 4]})
+    df['a'] = df['a'].astype('category')
+    df['b'] = df['b'].astype('category')
+
+    # observed == False
+    levels = [CategoricalIndex(['x', 'y'], categories=['x', 'y'], ordered=False),
+              CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=False),
+              Index(['min', 'max'])]
+    index, _ = MultiIndex.from_product(levels, names=['a', 'b', None]).sortlevel()
+    expected = pd.Series(data=[2, 1, 3, 3, 4, 4, np.nan, np.nan], index=index, name='c')
+    actual = df.groupby(['a', 'b']).c.apply(lambda x: {'min': x.min(), 'max': x.max()})
+    assert_series_equal(expected, actual)
+
+    # observed == True
+    index = MultiIndex.from_tuples([('x', 'a', 'max'), ('x', 'a', 'min'),
+                                    ('x', 'b', 'max'), ('x', 'b', 'min'),
+                                    ('y', 'a', 'max'), ('y', 'a', 'min')],names=('a', 'b', None))
+    expected = pd.Series(data=[2, 1, 3, 3, 4, 4], index=index, name='c')
+    actual = df.groupby(['a', 'b'], observed=True).c.apply(lambda x: {'min': x.min(), 'max': x.max()})
+    assert_series_equal(expected, actual)

From 2575c41727168039b7c2c62629e8a0226590176f Mon Sep 17 00:00:00 2001
From: krsnik93 <ivankrsnik93@gmail.com>
Date: Sun, 19 May 2019 21:56:47 +0100
Subject: [PATCH 02/14] Wrap long lines

---
 pandas/core/groupby/generic.py       |  9 +++++---
 pandas/tests/groupby/test_groupby.py | 34 +++++++++++++++++-----------
 2 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 32933c3385e25..dc414a588a2ce 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -859,8 +859,9 @@ def _get_index():
             # GH #823 #24880
             index = _get_index()
             result = self._reindex_output(DataFrame(values, index=index))
-            dropna = self.observed  # if self.observed is False, keep all-NaN rows created while re-indexing
-            result = result.stack(dropna=dropna)
+            # if self.observed is False,
+            # keep all-NaN rows created while re-indexing
+            result = result.stack(dropna=self.observed)
             result.name = self._selection_name
             return result
 
@@ -873,7 +874,9 @@ def _get_index():
                                         not_indexed_same=not_indexed_same)
         else:
             # GH #6265 #24880
-            result = Series(values, index=_get_index(), name=self._selection_name)
+            result = Series(data=values,
+                            index=_get_index(),
+                            name=self._selection_name)
             return self._reindex_output(result)
 
     def _aggregate_named(self, func, *args, **kwargs):
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 6aa07eac681bf..3690dc7bb048d 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -10,7 +10,8 @@
 
 import pandas as pd
 from pandas import (
-    DataFrame, Index, MultiIndex, CategoricalIndex, Series, Timestamp, date_range, read_csv)
+    DataFrame, Index, MultiIndex, CategoricalIndex,
+    Series, Timestamp, date_range, read_csv)
 import pandas.core.common as com
 import pandas.util.testing as tm
 from pandas.util.testing import (
@@ -1747,9 +1748,9 @@ def test_groupby_observed():
     df['b'] = df['b'].astype('category')
 
     # test .agg and .apply when observed == False
-    levels = [CategoricalIndex(['x', 'y'], categories=['x', 'y'], ordered=False),
-              CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=False)]
-    index, _ = MultiIndex.from_product(levels, names=['a', 'b']).sortlevel()
+    lvls = [CategoricalIndex(['x', 'y'], categories=['x', 'y'], ordered=False),
+            CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=False)]
+    index, _ = MultiIndex.from_product(lvls, names=['a', 'b']).sortlevel()
     expected = pd.Series(data=[3, 3, 4, np.nan], index=index, name='c')
     actual_agg = df.groupby(['a', 'b']).c.agg(sum)
     actual_apply = df.groupby(['a', 'b']).c.apply(sum)
@@ -1763,7 +1764,8 @@ def test_groupby_observed():
     assert_series_equal(expected, actual)
 
     # test .apply when observed == True
-    index = MultiIndex.from_tuples([('x', 'a'), ('x', 'b'), ('y', 'a')], names=('a', 'b'))
+    index = MultiIndex.from_tuples([('x', 'a'), ('x', 'b'), ('y', 'a')],
+                                   names=('a', 'b'))
     expected = pd.Series([3, 3, 4], index=index, name='c')
     actual = df.groupby(['a', 'b'], observed=True).c.apply(sum)
     assert_series_equal(expected, actual)
@@ -1778,18 +1780,24 @@ def test_groupby_observed_apply_lambda_returns_dict():
     df['b'] = df['b'].astype('category')
 
     # observed == False
-    levels = [CategoricalIndex(['x', 'y'], categories=['x', 'y'], ordered=False),
-              CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=False),
-              Index(['min', 'max'])]
-    index, _ = MultiIndex.from_product(levels, names=['a', 'b', None]).sortlevel()
-    expected = pd.Series(data=[2, 1, 3, 3, 4, 4, np.nan, np.nan], index=index, name='c')
-    actual = df.groupby(['a', 'b']).c.apply(lambda x: {'min': x.min(), 'max': x.max()})
+    lvls = [CategoricalIndex(['x', 'y'], categories=['x', 'y'], ordered=False),
+            CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=False),
+            Index(['min', 'max'])]
+    index, _ = MultiIndex.from_product(lvls,
+                                       names=['a', 'b', None]).sortlevel()
+    expected = pd.Series(data=[2, 1, 3, 3, 4, 4, np.nan, np.nan],
+                         index=index,
+                         name='c')
+    actual = df.groupby(['a', 'b']).c.apply(lambda x: {'min': x.min(),
+                                                       'max': x.max()})
     assert_series_equal(expected, actual)
 
     # observed == True
     index = MultiIndex.from_tuples([('x', 'a', 'max'), ('x', 'a', 'min'),
                                     ('x', 'b', 'max'), ('x', 'b', 'min'),
-                                    ('y', 'a', 'max'), ('y', 'a', 'min')],names=('a', 'b', None))
+                                    ('y', 'a', 'max'), ('y', 'a', 'min')],
+                                   names=('a', 'b', None))
     expected = pd.Series(data=[2, 1, 3, 3, 4, 4], index=index, name='c')
-    actual = df.groupby(['a', 'b'], observed=True).c.apply(lambda x: {'min': x.min(), 'max': x.max()})
+    actual = df.groupby(['a', 'b'], observed=True).c.\
+        apply(lambda x: {'min': x.min(), 'max': x.max()})
     assert_series_equal(expected, actual)

From 1c02d9fb0e893f72273846809f273171fa08be0b Mon Sep 17 00:00:00 2001
From: krsnik93 <ivankrsnik93@gmail.com>
Date: Mon, 20 May 2019 00:33:20 +0100
Subject: [PATCH 03/14] Move tests to test_categorical.py

---
 pandas/tests/groupby/test_categorical.py | 64 ++++++++++++++++++++++
 pandas/tests/groupby/test_groupby.py     | 67 +-----------------------
 2 files changed, 65 insertions(+), 66 deletions(-)

diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index 112f7629d735a..0f86e360fea6a 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -963,3 +963,67 @@ def test_shift(fill_value):
                               categories=['a', 'b', 'c', 'd'], ordered=False)
     res = ct.shift(1, fill_value=fill_value)
     assert_equal(res, expected)
+
+
+def test_groupby_series_observed():
+    # GH 24880
+    df = DataFrame({'a': ['x', 'x', 'x', 'y'],
+                    'b': ['a', 'a', 'b', 'a'],
+                    'c': [1, 2, 3, 4]})
+    df['a'] = df['a'].astype('category')
+    df['b'] = df['b'].astype('category')
+
+    # test .agg and .apply when observed == False
+    lvls = [CategoricalIndex(['x', 'y'], categories=['x', 'y'], ordered=False),
+            CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=False)]
+    index, _ = MultiIndex.from_product(lvls, names=['a', 'b']).sortlevel()
+    expected = pd.Series(data=[3, 3, 4, np.nan], index=index, name='c')
+    actual_agg = df.groupby(['a', 'b']).c.agg(sum)
+    actual_apply = df.groupby(['a', 'b']).c.apply(sum)
+    assert_series_equal(expected, actual_agg)
+    assert_series_equal(expected, actual_apply)
+
+    # test .agg when observed == True
+    index = MultiIndex.from_frame(df[['a', 'b']].drop_duplicates())
+    expected = pd.Series([3, 3, 4], index=index, name='c')
+    actual = df.groupby(['a', 'b'], observed=True).c.agg(sum)
+    assert_series_equal(expected, actual)
+
+    # test .apply when observed == True
+    index = MultiIndex.from_tuples([('x', 'a'), ('x', 'b'), ('y', 'a')],
+                                   names=('a', 'b'))
+    expected = pd.Series([3, 3, 4], index=index, name='c')
+    actual = df.groupby(['a', 'b'], observed=True).c.apply(sum)
+    assert_series_equal(expected, actual)
+
+
+def test_groupby_series_observed_apply_dict():
+    # GH 24880
+    df = DataFrame({'a': ['x', 'x', 'x', 'y'],
+                    'b': ['a', 'a', 'b', 'a'],
+                    'c': [1, 2, 3, 4]})
+    df['a'] = df['a'].astype('category')
+    df['b'] = df['b'].astype('category')
+
+    # observed == False
+    lvls = [CategoricalIndex(['x', 'y'], categories=['x', 'y'], ordered=False),
+            CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=False),
+            Index(['min', 'max'])]
+    index, _ = MultiIndex.from_product(lvls,
+                                       names=['a', 'b', None]).sortlevel()
+    expected = pd.Series(data=[2, 1, 3, 3, 4, 4, np.nan, np.nan],
+                         index=index,
+                         name='c')
+    actual = df.groupby(['a', 'b']).c.apply(lambda x: {'min': x.min(),
+                                                       'max': x.max()})
+    assert_series_equal(expected, actual)
+
+    # observed == True
+    index = MultiIndex.from_tuples([('x', 'a', 'max'), ('x', 'a', 'min'),
+                                    ('x', 'b', 'max'), ('x', 'b', 'min'),
+                                    ('y', 'a', 'max'), ('y', 'a', 'min')],
+                                   names=('a', 'b', None))
+    expected = pd.Series(data=[2, 1, 3, 3, 4, 4], index=index, name='c')
+    actual = df.groupby(['a', 'b'], observed=True).c.\
+        apply(lambda x: {'min': x.min(), 'max': x.max()})
+    assert_series_equal(expected, actual)
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 3690dc7bb048d..2511063110f92 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -10,8 +10,7 @@
 
 import pandas as pd
 from pandas import (
-    DataFrame, Index, MultiIndex, CategoricalIndex,
-    Series, Timestamp, date_range, read_csv)
+    DataFrame, Index, MultiIndex, Series, Timestamp, date_range, read_csv)
 import pandas.core.common as com
 import pandas.util.testing as tm
 from pandas.util.testing import (
@@ -1737,67 +1736,3 @@ def test_groupby_multiindex_series_keys_len_equal_group_axis():
     expected = pd.Series([3], index=ei)
 
     assert_series_equal(result, expected)
-
-
-def test_groupby_observed():
-    # GH 24880
-    df = DataFrame({'a': ['x', 'x', 'x', 'y'],
-                    'b': ['a', 'a', 'b', 'a'],
-                    'c': [1, 2, 3, 4]})
-    df['a'] = df['a'].astype('category')
-    df['b'] = df['b'].astype('category')
-
-    # test .agg and .apply when observed == False
-    lvls = [CategoricalIndex(['x', 'y'], categories=['x', 'y'], ordered=False),
-            CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=False)]
-    index, _ = MultiIndex.from_product(lvls, names=['a', 'b']).sortlevel()
-    expected = pd.Series(data=[3, 3, 4, np.nan], index=index, name='c')
-    actual_agg = df.groupby(['a', 'b']).c.agg(sum)
-    actual_apply = df.groupby(['a', 'b']).c.apply(sum)
-    assert_series_equal(expected, actual_agg)
-    assert_series_equal(expected, actual_apply)
-
-    # test .agg when observed == True
-    index = MultiIndex.from_frame(df[['a', 'b']].drop_duplicates())
-    expected = pd.Series([3, 3, 4], index=index, name='c')
-    actual = df.groupby(['a', 'b'], observed=True).c.agg(sum)
-    assert_series_equal(expected, actual)
-
-    # test .apply when observed == True
-    index = MultiIndex.from_tuples([('x', 'a'), ('x', 'b'), ('y', 'a')],
-                                   names=('a', 'b'))
-    expected = pd.Series([3, 3, 4], index=index, name='c')
-    actual = df.groupby(['a', 'b'], observed=True).c.apply(sum)
-    assert_series_equal(expected, actual)
-
-
-def test_groupby_observed_apply_lambda_returns_dict():
-    # GH 24880
-    df = DataFrame({'a': ['x', 'x', 'x', 'y'],
-                    'b': ['a', 'a', 'b', 'a'],
-                    'c': [1, 2, 3, 4]})
-    df['a'] = df['a'].astype('category')
-    df['b'] = df['b'].astype('category')
-
-    # observed == False
-    lvls = [CategoricalIndex(['x', 'y'], categories=['x', 'y'], ordered=False),
-            CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=False),
-            Index(['min', 'max'])]
-    index, _ = MultiIndex.from_product(lvls,
-                                       names=['a', 'b', None]).sortlevel()
-    expected = pd.Series(data=[2, 1, 3, 3, 4, 4, np.nan, np.nan],
-                         index=index,
-                         name='c')
-    actual = df.groupby(['a', 'b']).c.apply(lambda x: {'min': x.min(),
-                                                       'max': x.max()})
-    assert_series_equal(expected, actual)
-
-    # observed == True
-    index = MultiIndex.from_tuples([('x', 'a', 'max'), ('x', 'a', 'min'),
-                                    ('x', 'b', 'max'), ('x', 'b', 'min'),
-                                    ('y', 'a', 'max'), ('y', 'a', 'min')],
-                                   names=('a', 'b', None))
-    expected = pd.Series(data=[2, 1, 3, 3, 4, 4], index=index, name='c')
-    actual = df.groupby(['a', 'b'], observed=True).c.\
-        apply(lambda x: {'min': x.min(), 'max': x.max()})
-    assert_series_equal(expected, actual)

From 0e9f4737f19e6e5ab538481781fc9b371297173b Mon Sep 17 00:00:00 2001
From: krsnik93 <ivankrsnik93@gmail.com>
Date: Mon, 20 May 2019 13:46:59 +0100
Subject: [PATCH 04/14] Parameterized tests for 'observed' kwarg on
 SeriesGroupBy

---
 pandas/tests/groupby/conftest.py         | 48 +++++++++++++-
 pandas/tests/groupby/test_categorical.py | 79 +++++++-----------------
 2 files changed, 70 insertions(+), 57 deletions(-)

diff --git a/pandas/tests/groupby/conftest.py b/pandas/tests/groupby/conftest.py
index cb4fe511651ee..ca0ea5bc49d27 100644
--- a/pandas/tests/groupby/conftest.py
+++ b/pandas/tests/groupby/conftest.py
@@ -1,7 +1,7 @@
 import numpy as np
 import pytest
 
-from pandas import DataFrame, MultiIndex
+from pandas import DataFrame, CategoricalIndex, Index, MultiIndex
 from pandas.util import testing as tm
 
 
@@ -76,3 +76,49 @@ def three_group():
                       'D': np.random.randn(11),
                       'E': np.random.randn(11),
                       'F': np.random.randn(11)})
+
+
+@pytest.fixture
+def df_cat():
+    df = DataFrame({'a': ['x', 'x', 'x', 'y'],
+                    'b': ['a', 'a', 'b', 'a'],
+                    'c': [1, 2, 3, 4]})
+    df['a'] = df['a'].astype('category')
+    df['b'] = df['b'].astype('category')
+    return df
+
+
+@pytest.fixture
+def multi_index_cat_complete():
+    lvls = [CategoricalIndex(['x', 'y'], categories=['x', 'y'], ordered=False),
+            CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=False)]
+    index = MultiIndex.from_product(lvls, names=['a', 'b'])
+    return index
+
+
+@pytest.fixture
+def multi_index_cat_partial(df_cat):
+    return MultiIndex.from_frame(df_cat[['a', 'b']].drop_duplicates())
+
+
+@pytest.fixture
+def multi_index_non_cat_partial():
+    return MultiIndex.from_tuples([('x', 'a'), ('x', 'b'), ('y', 'a')],
+                                  names=('a', 'b'))
+
+
+@pytest.fixture
+def multi_index_cat_compl_dict():
+    lvls = [CategoricalIndex(['x', 'y'], categories=['x', 'y'], ordered=False),
+            CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=False),
+            Index(['min', 'max'])]
+    index = MultiIndex.from_product(lvls, names=['a', 'b', None])
+    return index
+
+
+@pytest.fixture
+def multi_index_non_cat_partial_dict():
+    return MultiIndex.from_tuples([('x', 'a', 'min'), ('x', 'a', 'max'),
+                                   ('x', 'b', 'min'), ('x', 'b', 'max'),
+                                   ('y', 'a', 'min'), ('y', 'a', 'max')],
+                                  names=('a', 'b', None))
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index 0f86e360fea6a..292e606390f34 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -1,4 +1,5 @@
 from datetime import datetime
+from collections import OrderedDict
 
 import numpy as np
 import pytest
@@ -965,65 +966,31 @@ def test_shift(fill_value):
     assert_equal(res, expected)
 
 
-def test_groupby_series_observed():
+@pytest.mark.parametrize("observed, index, op, data", [
+    (True, 'multi_index_cat_partial', 'agg', [3, 3, 4]),
+    (True, 'multi_index_non_cat_partial', 'apply', [3, 3, 4]),
+    (False, 'multi_index_cat_complete', 'agg', [3, 3, 4, np.nan]),
+    (False, 'multi_index_cat_complete', 'apply', [3, 3, 4, np.nan]),
+    (None, 'multi_index_cat_complete', 'agg', [3, 3, 4, np.nan]),
+    (None, 'multi_index_cat_complete', 'apply', [3, 3, 4, np.nan])])
+def test_groupby_series_observed(request, df_cat, observed, index, op, data):
     # GH 24880
-    df = DataFrame({'a': ['x', 'x', 'x', 'y'],
-                    'b': ['a', 'a', 'b', 'a'],
-                    'c': [1, 2, 3, 4]})
-    df['a'] = df['a'].astype('category')
-    df['b'] = df['b'].astype('category')
-
-    # test .agg and .apply when observed == False
-    lvls = [CategoricalIndex(['x', 'y'], categories=['x', 'y'], ordered=False),
-            CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=False)]
-    index, _ = MultiIndex.from_product(lvls, names=['a', 'b']).sortlevel()
-    expected = pd.Series(data=[3, 3, 4, np.nan], index=index, name='c')
-    actual_agg = df.groupby(['a', 'b']).c.agg(sum)
-    actual_apply = df.groupby(['a', 'b']).c.apply(sum)
-    assert_series_equal(expected, actual_agg)
-    assert_series_equal(expected, actual_apply)
-
-    # test .agg when observed == True
-    index = MultiIndex.from_frame(df[['a', 'b']].drop_duplicates())
-    expected = pd.Series([3, 3, 4], index=index, name='c')
-    actual = df.groupby(['a', 'b'], observed=True).c.agg(sum)
-    assert_series_equal(expected, actual)
-
-    # test .apply when observed == True
-    index = MultiIndex.from_tuples([('x', 'a'), ('x', 'b'), ('y', 'a')],
-                                   names=('a', 'b'))
-    expected = pd.Series([3, 3, 4], index=index, name='c')
-    actual = df.groupby(['a', 'b'], observed=True).c.apply(sum)
+    index = request.getfixturevalue(index)
+    expected = pd.Series(data=data, index=index, name='c')
+    grouped = df_cat.groupby(['a', 'b'], observed=observed).c
+    actual = getattr(grouped, op)(sum)
     assert_series_equal(expected, actual)
 
 
-def test_groupby_series_observed_apply_dict():
+@pytest.mark.parametrize("observed, index, data", [
+    (True, 'multi_index_non_cat_partial_dict', [1, 2, 3, 3, 4, 4]),
+    (False, 'multi_index_cat_compl_dict', [1, 2, 3, 3, 4, 4, np.nan, np.nan]),
+    (None, 'multi_index_cat_compl_dict', [1, 2, 3, 3, 4, 4, np.nan, np.nan])])
+def test_groupby_series_observed_apply_dict(request, df_cat, observed, index,
+                                            data):
     # GH 24880
-    df = DataFrame({'a': ['x', 'x', 'x', 'y'],
-                    'b': ['a', 'a', 'b', 'a'],
-                    'c': [1, 2, 3, 4]})
-    df['a'] = df['a'].astype('category')
-    df['b'] = df['b'].astype('category')
-
-    # observed == False
-    lvls = [CategoricalIndex(['x', 'y'], categories=['x', 'y'], ordered=False),
-            CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=False),
-            Index(['min', 'max'])]
-    index, _ = MultiIndex.from_product(lvls,
-                                       names=['a', 'b', None]).sortlevel()
-    expected = pd.Series(data=[2, 1, 3, 3, 4, 4, np.nan, np.nan],
-                         index=index,
-                         name='c')
-    actual = df.groupby(['a', 'b']).c.apply(lambda x: {'min': x.min(),
-                                                       'max': x.max()})
-    assert_series_equal(expected, actual)
-
-    # observed == True
-    index = MultiIndex.from_tuples([('x', 'a', 'max'), ('x', 'a', 'min'),
-                                    ('x', 'b', 'max'), ('x', 'b', 'min'),
-                                    ('y', 'a', 'max'), ('y', 'a', 'min')],
-                                   names=('a', 'b', None))
-    expected = pd.Series(data=[2, 1, 3, 3, 4, 4], index=index, name='c')
-    actual = df.groupby(['a', 'b'], observed=True).c.\
-        apply(lambda x: {'min': x.min(), 'max': x.max()})
+    index = request.getfixturevalue(index)
+    expected = pd.Series(data=data, index=index, name='c')
+    actual = df_cat.groupby(['a', 'b'], observed=observed).c.\
+        apply(lambda x: OrderedDict([('min', x.min()), ('max', x.max())]))
     assert_series_equal(expected, actual)

From cd481ad5c016d7fd919d77d96ea7ab120d3a65c2 Mon Sep 17 00:00:00 2001
From: krsnik93 <ivankrsnik93@gmail.com>
Date: Mon, 20 May 2019 18:09:19 +0100
Subject: [PATCH 05/14] Split test_groupby_series_observed to utilize fixtures
 better;Sort imports

---
 pandas/tests/groupby/conftest.py         |  2 +-
 pandas/tests/groupby/test_categorical.py | 40 ++++++++++++++----------
 2 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/pandas/tests/groupby/conftest.py b/pandas/tests/groupby/conftest.py
index ca0ea5bc49d27..42c0080bf60e6 100644
--- a/pandas/tests/groupby/conftest.py
+++ b/pandas/tests/groupby/conftest.py
@@ -1,7 +1,7 @@
 import numpy as np
 import pytest
 
-from pandas import DataFrame, CategoricalIndex, Index, MultiIndex
+from pandas import CategoricalIndex, DataFrame, Index, MultiIndex
 from pandas.util import testing as tm
 
 
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index 292e606390f34..dfa4872b9daf7 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -1,5 +1,5 @@
-from datetime import datetime
 from collections import OrderedDict
+from datetime import datetime
 
 import numpy as np
 import pytest
@@ -966,20 +966,28 @@ def test_shift(fill_value):
     assert_equal(res, expected)
 
 
-@pytest.mark.parametrize("observed, index, op, data", [
-    (True, 'multi_index_cat_partial', 'agg', [3, 3, 4]),
-    (True, 'multi_index_non_cat_partial', 'apply', [3, 3, 4]),
-    (False, 'multi_index_cat_complete', 'agg', [3, 3, 4, np.nan]),
-    (False, 'multi_index_cat_complete', 'apply', [3, 3, 4, np.nan]),
-    (None, 'multi_index_cat_complete', 'agg', [3, 3, 4, np.nan]),
-    (None, 'multi_index_cat_complete', 'apply', [3, 3, 4, np.nan])])
-def test_groupby_series_observed(request, df_cat, observed, index, op, data):
+@pytest.mark.parametrize("index, op", [
+    ('multi_index_cat_partial', 'agg'),
+    ('multi_index_non_cat_partial', 'apply')])
+def test_groupby_series_observed_true(request, df_cat, index, op):
     # GH 24880
     index = request.getfixturevalue(index)
-    expected = pd.Series(data=data, index=index, name='c')
-    grouped = df_cat.groupby(['a', 'b'], observed=observed).c
-    actual = getattr(grouped, op)(sum)
-    assert_series_equal(expected, actual)
+    expected = pd.Series(data=[3, 3, 4], index=index, name='c')
+    grouped = df_cat.groupby(['a', 'b'], observed=True)['c']
+    result = getattr(grouped, op)(sum)
+    assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize("op", ['agg', 'apply'])
+@pytest.mark.parametrize("observed", [False, None])
+def test_groupby_series_observed_false_or_none(
+        df_cat, multi_index_cat_complete, observed, op):
+    # GH 24880
+    index = multi_index_cat_complete
+    expected = pd.Series(data=[3, 3, 4, np.nan], index=index, name='c')
+    grouped = df_cat.groupby(['a', 'b'], observed=observed)['c']
+    result = getattr(grouped, op)(sum)
+    assert_series_equal(result, expected)
 
 
 @pytest.mark.parametrize("observed, index, data", [
@@ -991,6 +999,6 @@ def test_groupby_series_observed_apply_dict(request, df_cat, observed, index,
     # GH 24880
     index = request.getfixturevalue(index)
     expected = pd.Series(data=data, index=index, name='c')
-    actual = df_cat.groupby(['a', 'b'], observed=observed).c.\
-        apply(lambda x: OrderedDict([('min', x.min()), ('max', x.max())]))
-    assert_series_equal(expected, actual)
+    result = df_cat.groupby(['a', 'b'], observed=observed)['c'].apply(
+        lambda x: OrderedDict([('min', x.min()), ('max', x.max())]))
+    assert_series_equal(result, expected)

From a515cafd95047ab9408064b3c67fe2c6bb44c07c Mon Sep 17 00:00:00 2001
From: krsnik93 <ivankrsnik93@gmail.com>
Date: Mon, 20 May 2019 19:47:23 +0100
Subject: [PATCH 06/14] Sort imports in core/groupby/groupby.py

---
 pandas/core/groupby/groupby.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 3d8716def20fb..75d678ae277e1 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -17,7 +17,6 @@ class providing the base-class of operations.
 
 import numpy as np
 
-from pandas.core.arrays import Categorical
 from pandas._config.config import option_context
 
 from pandas._libs import Timestamp
@@ -37,13 +36,14 @@ class providing the base-class of operations.
 from pandas.api.types import (
     is_datetime64_dtype, is_integer_dtype, is_object_dtype)
 import pandas.core.algorithms as algorithms
+from pandas.core.arrays import Categorical
 from pandas.core.base import (
     DataError, GroupByError, PandasObject, SelectionMixin, SpecificationError)
 import pandas.core.common as com
 from pandas.core.frame import DataFrame
 from pandas.core.generic import NDFrame
 from pandas.core.groupby import base
-from pandas.core.index import Index, CategoricalIndex, MultiIndex
+from pandas.core.index import CategoricalIndex, Index, MultiIndex
 from pandas.core.series import Series
 from pandas.core.sorting import get_group_index_sorter
 

From ff42dd7bde58abb004214b1d53a9f5e171b31a13 Mon Sep 17 00:00:00 2001
From: krsnik93 <ivankrsnik93@gmail.com>
Date: Mon, 20 May 2019 23:40:49 +0100
Subject: [PATCH 07/14] Remove too specific fixtures and adjust tests

---
 pandas/tests/groupby/conftest.py         | 42 ++--------------
 pandas/tests/groupby/test_categorical.py | 64 ++++++++++++++++--------
 2 files changed, 47 insertions(+), 59 deletions(-)

diff --git a/pandas/tests/groupby/conftest.py b/pandas/tests/groupby/conftest.py
index 42c0080bf60e6..3a8ca2383a091 100644
--- a/pandas/tests/groupby/conftest.py
+++ b/pandas/tests/groupby/conftest.py
@@ -1,7 +1,7 @@
 import numpy as np
 import pytest
 
-from pandas import CategoricalIndex, DataFrame, Index, MultiIndex
+from pandas import DataFrame, MultiIndex
 from pandas.util import testing as tm
 
 
@@ -80,45 +80,9 @@ def three_group():
 
 @pytest.fixture
 def df_cat():
-    df = DataFrame({'a': ['x', 'x', 'x', 'y'],
-                    'b': ['a', 'a', 'b', 'a'],
+    df = DataFrame({'a': ['one', 'one', 'one', 'two'],
+                    'b': ['foo', 'foo', 'bar', 'foo'],
                     'c': [1, 2, 3, 4]})
     df['a'] = df['a'].astype('category')
     df['b'] = df['b'].astype('category')
     return df
-
-
-@pytest.fixture
-def multi_index_cat_complete():
-    lvls = [CategoricalIndex(['x', 'y'], categories=['x', 'y'], ordered=False),
-            CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=False)]
-    index = MultiIndex.from_product(lvls, names=['a', 'b'])
-    return index
-
-
-@pytest.fixture
-def multi_index_cat_partial(df_cat):
-    return MultiIndex.from_frame(df_cat[['a', 'b']].drop_duplicates())
-
-
-@pytest.fixture
-def multi_index_non_cat_partial():
-    return MultiIndex.from_tuples([('x', 'a'), ('x', 'b'), ('y', 'a')],
-                                  names=('a', 'b'))
-
-
-@pytest.fixture
-def multi_index_cat_compl_dict():
-    lvls = [CategoricalIndex(['x', 'y'], categories=['x', 'y'], ordered=False),
-            CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=False),
-            Index(['min', 'max'])]
-    index = MultiIndex.from_product(lvls, names=['a', 'b', None])
-    return index
-
-
-@pytest.fixture
-def multi_index_non_cat_partial_dict():
-    return MultiIndex.from_tuples([('x', 'a', 'min'), ('x', 'a', 'max'),
-                                   ('x', 'b', 'min'), ('x', 'b', 'max'),
-                                   ('y', 'a', 'min'), ('y', 'a', 'max')],
-                                  names=('a', 'b', None))
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index dfa4872b9daf7..484dac298362f 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -966,38 +966,62 @@ def test_shift(fill_value):
     assert_equal(res, expected)
 
 
-@pytest.mark.parametrize("index, op", [
-    ('multi_index_cat_partial', 'agg'),
-    ('multi_index_non_cat_partial', 'apply')])
-def test_groupby_series_observed_true(request, df_cat, index, op):
+@pytest.mark.parametrize('operation', ['agg', 'apply'])
+def test_groupby_series_observed_true(df_cat, operation):
     # GH 24880
-    index = request.getfixturevalue(index)
+    index = {
+        'agg': MultiIndex.from_frame(df_cat[['a', 'b']].drop_duplicates()),
+        'apply': MultiIndex.from_tuples(
+            [tuple(grp) for grp in
+             df_cat.select_dtypes('category').drop_duplicates().values],
+            names=df_cat.select_dtypes('category'))
+    }[operation]
+
     expected = pd.Series(data=[3, 3, 4], index=index, name='c')
     grouped = df_cat.groupby(['a', 'b'], observed=True)['c']
-    result = getattr(grouped, op)(sum)
+    result = getattr(grouped, operation)(sum)
     assert_series_equal(result, expected)
 
 
-@pytest.mark.parametrize("op", ['agg', 'apply'])
-@pytest.mark.parametrize("observed", [False, None])
-def test_groupby_series_observed_false_or_none(
-        df_cat, multi_index_cat_complete, observed, op):
+@pytest.mark.parametrize('operation', ['agg', 'apply'])
+@pytest.mark.parametrize('observed', [False, None])
+def test_groupby_series_observed_false_or_none(df_cat, observed, operation):
     # GH 24880
-    index = multi_index_cat_complete
-    expected = pd.Series(data=[3, 3, 4, np.nan], index=index, name='c')
+    index, _ = MultiIndex.from_product(
+        iterables=(CategoricalIndex(data=d)
+                   for d in np.apply_along_axis(
+            np.unique, 1, df_cat.select_dtypes('category').T.values)),
+        names=df_cat.select_dtypes('category').columns).sortlevel()
+
+    expected = pd.Series(data=[3, 3, np.nan, 4], index=index, name='c')
     grouped = df_cat.groupby(['a', 'b'], observed=observed)['c']
-    result = getattr(grouped, op)(sum)
+    result = getattr(grouped, operation)(sum)
     assert_series_equal(result, expected)
 
 
-@pytest.mark.parametrize("observed, index, data", [
-    (True, 'multi_index_non_cat_partial_dict', [1, 2, 3, 3, 4, 4]),
-    (False, 'multi_index_cat_compl_dict', [1, 2, 3, 3, 4, 4, np.nan, np.nan]),
-    (None, 'multi_index_cat_compl_dict', [1, 2, 3, 3, 4, 4, np.nan, np.nan])])
-def test_groupby_series_observed_apply_dict(request, df_cat, observed, index,
-                                            data):
+@pytest.mark.parametrize("observed, data", [
+    (True, [1, 2, 3, 3, 4, 4]),
+    (False, [3, 3, 1, 2, np.nan, np.nan, 4.0, 4.0]),
+    (None, [3, 3, 1, 2, np.nan, np.nan, 4.0, 4.0])])
+def test_groupby_series_observed_apply_dict(df_cat, observed, data):
     # GH 24880
-    index = request.getfixturevalue(index)
+    index_names = df_cat.select_dtypes(
+        'category').columns.values.tolist() + [None]
+    index = {
+        True: MultiIndex.from_tuples(
+            [tuple(list(grp) + [p])
+             for grp in df_cat.select_dtypes(
+                'category').drop_duplicates().values
+             for p in ('min', 'max')],
+            names=index_names),
+        False: MultiIndex.from_product(
+            [CategoricalIndex(data=d)
+             for d in np.apply_along_axis(
+                np.unique, 1, df_cat.select_dtypes('category').T.values)
+             ] + [Index(['min', 'max'])],
+            names=index_names)
+    }[bool(observed)]
+
     expected = pd.Series(data=data, index=index, name='c')
     result = df_cat.groupby(['a', 'b'], observed=observed)['c'].apply(
         lambda x: OrderedDict([('min', x.min()), ('max', x.max())]))

From cc0b72590989253f23e8eab606266f4fbc3d0d94 Mon Sep 17 00:00:00 2001
From: krsnik93 <ivankrsnik93@gmail.com>
Date: Tue, 21 May 2019 09:33:02 +0100
Subject: [PATCH 08/14] Use literal values for indices in tests

---
 pandas/tests/groupby/test_categorical.py | 61 ++++++++++++++----------
 1 file changed, 37 insertions(+), 24 deletions(-)

diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index 484dac298362f..1a2b62e556981 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -970,11 +970,17 @@ def test_shift(fill_value):
 def test_groupby_series_observed_true(df_cat, operation):
     # GH 24880
     index = {
-        'agg': MultiIndex.from_frame(df_cat[['a', 'b']].drop_duplicates()),
-        'apply': MultiIndex.from_tuples(
-            [tuple(grp) for grp in
-             df_cat.select_dtypes('category').drop_duplicates().values],
-            names=df_cat.select_dtypes('category'))
+        'agg': MultiIndex(levels=[CategoricalIndex(['one', 'two'],
+                                                   categories=['one', 'two'],
+                                                   ordered=False),
+                                  CategoricalIndex(['bar', 'foo'],
+                                                   categories=['bar', 'foo'],
+                                                   ordered=False)],
+                          codes=[[0, 0, 1], [1, 0, 1]],
+                          names=['a', 'b']),
+        'apply': MultiIndex(levels=[['one', 'two'], ['bar', 'foo']],
+                            codes=[[0, 0, 1], [1, 0, 1]],
+                            names=['a', 'b'])
     }[operation]
 
     expected = pd.Series(data=[3, 3, 4], index=index, name='c')
@@ -987,11 +993,14 @@ def test_groupby_series_observed_true(df_cat, operation):
 @pytest.mark.parametrize('observed', [False, None])
 def test_groupby_series_observed_false_or_none(df_cat, observed, operation):
     # GH 24880
-    index, _ = MultiIndex.from_product(
-        iterables=(CategoricalIndex(data=d)
-                   for d in np.apply_along_axis(
-            np.unique, 1, df_cat.select_dtypes('category').T.values)),
-        names=df_cat.select_dtypes('category').columns).sortlevel()
+    index, _ = MultiIndex(levels=[CategoricalIndex(['one', 'two'],
+                                                   categories=['one', 'two'],
+                                                   ordered=False),
+                                  CategoricalIndex(['bar', 'foo'],
+                                                   categories=['bar', 'foo'],
+                                                   ordered=False)],
+                          codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
+                          names=['a', 'b']).sortlevel()
 
     expected = pd.Series(data=[3, 3, np.nan, 4], index=index, name='c')
     grouped = df_cat.groupby(['a', 'b'], observed=observed)['c']
@@ -1005,21 +1014,25 @@ def test_groupby_series_observed_false_or_none(df_cat, observed, operation):
     (None, [3, 3, 1, 2, np.nan, np.nan, 4.0, 4.0])])
 def test_groupby_series_observed_apply_dict(df_cat, observed, data):
     # GH 24880
-    index_names = df_cat.select_dtypes(
-        'category').columns.values.tolist() + [None]
     index = {
-        True: MultiIndex.from_tuples(
-            [tuple(list(grp) + [p])
-             for grp in df_cat.select_dtypes(
-                'category').drop_duplicates().values
-             for p in ('min', 'max')],
-            names=index_names),
-        False: MultiIndex.from_product(
-            [CategoricalIndex(data=d)
-             for d in np.apply_along_axis(
-                np.unique, 1, df_cat.select_dtypes('category').T.values)
-             ] + [Index(['min', 'max'])],
-            names=index_names)
+        True: MultiIndex(levels=[['one', 'two'],
+                                 ['bar', 'foo'],
+                                 ['max', 'min']],
+                         codes=[[0, 0, 0, 0, 1, 1],
+                                [1, 1, 0, 0, 1, 1],
+                                [1, 0, 1, 0, 1, 0]],
+                         names=['a', 'b', None]),
+        False: MultiIndex(levels=[CategoricalIndex(['one', 'two'],
+                                                   categories=['one', 'two'],
+                                                   ordered=False),
+                                  CategoricalIndex(['bar', 'foo'],
+                                                   categories=['bar', 'foo'],
+                                                   ordered=False),
+                                  Index(['max', 'min'])],
+                          codes=[[0, 0, 0, 0, 1, 1, 1, 1],
+                                 [0, 0, 1, 1, 0, 0, 1, 1],
+                                 [1, 0, 1, 0, 1, 0, 1, 0]],
+                          names=['a', 'b', None])
     }[bool(observed)]
 
     expected = pd.Series(data=data, index=index, name='c')

From e4fda22837922e900947af3e7ffb1a2e195fb5f9 Mon Sep 17 00:00:00 2001
From: krsnik93 <ivankrsnik93@gmail.com>
Date: Wed, 22 May 2019 09:48:23 +0100
Subject: [PATCH 09/14] Use MultiIndex.from_* to construct indices in tests

---
 pandas/tests/groupby/test_categorical.py | 78 +++++++++---------------
 1 file changed, 28 insertions(+), 50 deletions(-)

diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index 1a2b62e556981..8e58a6a45be8b 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -966,23 +966,14 @@ def test_shift(fill_value):
     assert_equal(res, expected)
 
 
-@pytest.mark.parametrize('operation', ['agg', 'apply'])
-def test_groupby_series_observed_true(df_cat, operation):
+@pytest.mark.parametrize('operation, index', [
+    ('agg', MultiIndex.from_frame(pd.DataFrame({'a': ['one', 'one', 'two'],
+                                                'b': ['foo', 'bar', 'foo']},
+                                               dtype='category'))),
+    ('apply', MultiIndex.from_frame(pd.DataFrame({'a': ['one', 'one', 'two'],
+                                                  'b': ['foo', 'bar', 'foo']})))])
+def test_groupby_series_observed_true(df_cat, operation, index):
     # GH 24880
-    index = {
-        'agg': MultiIndex(levels=[CategoricalIndex(['one', 'two'],
-                                                   categories=['one', 'two'],
-                                                   ordered=False),
-                                  CategoricalIndex(['bar', 'foo'],
-                                                   categories=['bar', 'foo'],
-                                                   ordered=False)],
-                          codes=[[0, 0, 1], [1, 0, 1]],
-                          names=['a', 'b']),
-        'apply': MultiIndex(levels=[['one', 'two'], ['bar', 'foo']],
-                            codes=[[0, 0, 1], [1, 0, 1]],
-                            names=['a', 'b'])
-    }[operation]
-
     expected = pd.Series(data=[3, 3, 4], index=index, name='c')
     grouped = df_cat.groupby(['a', 'b'], observed=True)['c']
     result = getattr(grouped, operation)(sum)
@@ -993,14 +984,10 @@ def test_groupby_series_observed_true(df_cat, operation):
 @pytest.mark.parametrize('observed', [False, None])
 def test_groupby_series_observed_false_or_none(df_cat, observed, operation):
     # GH 24880
-    index, _ = MultiIndex(levels=[CategoricalIndex(['one', 'two'],
-                                                   categories=['one', 'two'],
-                                                   ordered=False),
-                                  CategoricalIndex(['bar', 'foo'],
-                                                   categories=['bar', 'foo'],
-                                                   ordered=False)],
-                          codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
-                          names=['a', 'b']).sortlevel()
+    index, _ = MultiIndex.from_product(
+        [CategoricalIndex(['one', 'two'], categories=['one', 'two'], ordered=False),
+         CategoricalIndex(['bar', 'foo'], categories=['bar', 'foo'], ordered=False)],
+        names=['a', 'b']).sortlevel()
 
     expected = pd.Series(data=[3, 3, np.nan, 4], index=index, name='c')
     grouped = df_cat.groupby(['a', 'b'], observed=observed)['c']
@@ -1008,33 +995,24 @@ def test_groupby_series_observed_false_or_none(df_cat, observed, operation):
     assert_series_equal(result, expected)
 
 
-@pytest.mark.parametrize("observed, data", [
-    (True, [1, 2, 3, 3, 4, 4]),
-    (False, [3, 3, 1, 2, np.nan, np.nan, 4.0, 4.0]),
-    (None, [3, 3, 1, 2, np.nan, np.nan, 4.0, 4.0])])
-def test_groupby_series_observed_apply_dict(df_cat, observed, data):
+@pytest.mark.parametrize("observed, index, data", [
+    (True, MultiIndex.from_tuples(
+        [('one', 'foo', 'min'), ('one', 'foo', 'max'),
+         ('one', 'bar', 'min'), ('one', 'bar', 'max'),
+         ('two', 'foo', 'min'), ('two', 'foo', 'max')],
+        names=['a', 'b', None]), [1, 2, 3, 3, 4, 4]),
+    (False, MultiIndex.from_product(
+        [CategoricalIndex(['one', 'two'], categories=['one', 'two'], ordered=False),
+         CategoricalIndex(['bar', 'foo'], categories=['bar', 'foo'], ordered=False),
+         Index(['min', 'max'])],
+        names=['a', 'b', None]),  [3, 3, 1, 2, np.nan, np.nan, 4.0, 4.0]),
+    (None, MultiIndex.from_product(
+        [CategoricalIndex(['one', 'two'], categories=['one', 'two'], ordered=False),
+         CategoricalIndex(['bar', 'foo'], categories=['bar', 'foo'], ordered=False),
+         Index(['min', 'max'])],
+        names=['a', 'b', None]), [3, 3, 1, 2, np.nan, np.nan, 4.0, 4.0])])
+def test_groupby_series_observed_apply_dict(df_cat, observed, index, data):
     # GH 24880
-    index = {
-        True: MultiIndex(levels=[['one', 'two'],
-                                 ['bar', 'foo'],
-                                 ['max', 'min']],
-                         codes=[[0, 0, 0, 0, 1, 1],
-                                [1, 1, 0, 0, 1, 1],
-                                [1, 0, 1, 0, 1, 0]],
-                         names=['a', 'b', None]),
-        False: MultiIndex(levels=[CategoricalIndex(['one', 'two'],
-                                                   categories=['one', 'two'],
-                                                   ordered=False),
-                                  CategoricalIndex(['bar', 'foo'],
-                                                   categories=['bar', 'foo'],
-                                                   ordered=False),
-                                  Index(['max', 'min'])],
-                          codes=[[0, 0, 0, 0, 1, 1, 1, 1],
-                                 [0, 0, 1, 1, 0, 0, 1, 1],
-                                 [1, 0, 1, 0, 1, 0, 1, 0]],
-                          names=['a', 'b', None])
-    }[bool(observed)]
-
     expected = pd.Series(data=data, index=index, name='c')
     result = df_cat.groupby(['a', 'b'], observed=observed)['c'].apply(
         lambda x: OrderedDict([('min', x.min()), ('max', x.max())]))

From 8cfa4a13a46377658cb6bae6bb81f2be745e6be5 Mon Sep 17 00:00:00 2001
From: krsnik93 <ivankrsnik93@gmail.com>
Date: Wed, 22 May 2019 09:58:22 +0100
Subject: [PATCH 10/14] Wrap long lines

---
 pandas/tests/groupby/test_categorical.py | 29 +++++++++++++++++-------
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index 8e58a6a45be8b..4b6af5da403dc 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -971,7 +971,8 @@ def test_shift(fill_value):
                                                 'b': ['foo', 'bar', 'foo']},
                                                dtype='category'))),
     ('apply', MultiIndex.from_frame(pd.DataFrame({'a': ['one', 'one', 'two'],
-                                                  'b': ['foo', 'bar', 'foo']})))])
+                                                  'b': ['foo', 'bar', 'foo']}))
+     )])
 def test_groupby_series_observed_true(df_cat, operation, index):
     # GH 24880
     expected = pd.Series(data=[3, 3, 4], index=index, name='c')
@@ -985,8 +986,12 @@ def test_groupby_series_observed_true(df_cat, operation, index):
 def test_groupby_series_observed_false_or_none(df_cat, observed, operation):
     # GH 24880
     index, _ = MultiIndex.from_product(
-        [CategoricalIndex(['one', 'two'], categories=['one', 'two'], ordered=False),
-         CategoricalIndex(['bar', 'foo'], categories=['bar', 'foo'], ordered=False)],
+        [CategoricalIndex(['one', 'two'],
+                          categories=['one', 'two'],
+                          ordered=False),
+         CategoricalIndex(['bar', 'foo'],
+                          categories=['bar', 'foo'],
+                          ordered=False)],
         names=['a', 'b']).sortlevel()
 
     expected = pd.Series(data=[3, 3, np.nan, 4], index=index, name='c')
@@ -1002,13 +1007,21 @@ def test_groupby_series_observed_false_or_none(df_cat, observed, operation):
          ('two', 'foo', 'min'), ('two', 'foo', 'max')],
         names=['a', 'b', None]), [1, 2, 3, 3, 4, 4]),
     (False, MultiIndex.from_product(
-        [CategoricalIndex(['one', 'two'], categories=['one', 'two'], ordered=False),
-         CategoricalIndex(['bar', 'foo'], categories=['bar', 'foo'], ordered=False),
+        [CategoricalIndex(['one', 'two'],
+                          categories=['one', 'two'],
+                          ordered=False),
+         CategoricalIndex(['bar', 'foo'],
+                          categories=['bar', 'foo'],
+                          ordered=False),
          Index(['min', 'max'])],
-        names=['a', 'b', None]),  [3, 3, 1, 2, np.nan, np.nan, 4.0, 4.0]),
+        names=['a', 'b', None]), [3, 3, 1, 2, np.nan, np.nan, 4.0, 4.0]),
     (None, MultiIndex.from_product(
-        [CategoricalIndex(['one', 'two'], categories=['one', 'two'], ordered=False),
-         CategoricalIndex(['bar', 'foo'], categories=['bar', 'foo'], ordered=False),
+        [CategoricalIndex(['one', 'two'],
+                          categories=['one', 'two'],
+                          ordered=False),
+         CategoricalIndex(['bar', 'foo'],
+                          categories=['bar', 'foo'],
+                          ordered=False),
          Index(['min', 'max'])],
         names=['a', 'b', None]), [3, 3, 1, 2, np.nan, np.nan, 4.0, 4.0])])
 def test_groupby_series_observed_apply_dict(df_cat, observed, index, data):

From d520952b745bdf631d7c87cc6309b5a0dec4b063 Mon Sep 17 00:00:00 2001
From: krsnik93 <ivankrsnik93@gmail.com>
Date: Sun, 26 May 2019 23:45:16 +0100
Subject: [PATCH 11/14] Enhance docstring for _reindex_output

---
 pandas/core/groupby/groupby.py | 52 ++++++++++++++++++++--------------
 1 file changed, 31 insertions(+), 21 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 737e411b64e88..91bb71a1a8af7 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -2302,33 +2302,43 @@ def tail(self, n=5):
         mask = self._cumcount_array(ascending=False) < n
         return self._selected_obj[mask]
 
-    def _reindex_output(self, result):
+    def _reindex_output(self, output):
         """
-        If we have categorical groupers, then we want to make sure that
-        we have a fully reindex-output to the levels. These may have not
-        participated in the groupings (e.g. may have all been
-        nan groups);
+        If we have categorical groupers, then we might want to make sure that
+        we have a fully re-indexed output to the levels. This means expanding
+        the output space to accommodate all values in the cartesian product of
+        our groups, regardless of whether they were observed in the data or
+        not. This will expand the output space if there are missing groups.
 
-        This can re-expand the output space
-        """
+        The method returns early without modifying the input if the number of
+        groupings is less than 2, self.observed == True or none of the groupers
+        are categorical.
+
+        Parameters
+        ----------
+        output: Series or DataFrame
+            Object resulting from grouping and applying an operation.
 
-        # we need to re-expand the output space to accomodate all values
-        # whether observed or not in the cartesian product of our groupes
+        Returns
+        -------
+        Series or DataFrame
+            Object (potentially) re-indexed to include all possible groups.
+        """
         groupings = self.grouper.groupings
         if groupings is None:
-            return result
+            return output
         elif len(groupings) == 1:
-            return result
+            return output
 
         # if we only care about the observed values
         # we are done
         elif self.observed:
-            return result
+            return output
 
         # reindexing only applies to a Categorical grouper
         elif not any(isinstance(ping.grouper, (Categorical, CategoricalIndex))
                      for ping in groupings):
-            return result
+            return output
 
         levels_list = [ping.group_index for ping in groupings]
         index, _ = MultiIndex.from_product(
@@ -2336,34 +2346,34 @@ def _reindex_output(self, result):
 
         if self.as_index:
             d = {self.obj._get_axis_name(self.axis): index, 'copy': False}
-            return result.reindex(**d)
+            return output.reindex(**d)
 
         # GH 13204
         # Here, the categorical in-axis groupers, which need to be fully
-        # expanded, are columns in `result`. An idea is to do:
-        # result = result.set_index(self.grouper.names)
+        # expanded, are columns in `output`. An idea is to do:
+        # output = output.set_index(self.grouper.names)
         #                .reindex(index).reset_index()
         # but special care has to be taken because of possible not-in-axis
         # groupers.
         # So, we manually select and drop the in-axis grouper columns,
-        # reindex `result`, and then reset the in-axis grouper columns.
+        # reindex `output`, and then reset the in-axis grouper columns.
 
         # Select in-axis groupers
         in_axis_grps = ((i, ping.name) for (i, ping)
                         in enumerate(groupings) if ping.in_axis)
         g_nums, g_names = zip(*in_axis_grps)
 
-        result = result.drop(labels=list(g_names), axis=1)
+        output = output.drop(labels=list(g_names), axis=1)
 
         # Set a temp index and reindex (possibly expanding)
-        result = result.set_index(self.grouper.result_index
+        output = output.set_index(self.grouper.result_index
                                   ).reindex(index, copy=False)
 
         # Reset in-axis grouper columns
         # (using level numbers `g_nums` because level names may not be unique)
-        result = result.reset_index(level=g_nums)
+        output = output.reset_index(level=g_nums)
 
-        return result.reset_index(drop=True)
+        return output.reset_index(drop=True)
 
 
 GroupBy._add_numeric_operations()

From 3591dbc34c273d13bd0496308ed020d5f01d6219 Mon Sep 17 00:00:00 2001
From: krsnik93 <ivankrsnik93@gmail.com>
Date: Mon, 27 May 2019 14:51:32 +0100
Subject: [PATCH 12/14] Modify tests to reuse existing fixture

---
 doc/source/whatsnew/v0.25.0.rst          |   1 +
 pandas/tests/groupby/conftest.py         |  10 --
 pandas/tests/groupby/test_categorical.py | 136 ++++++++++++-----------
 3 files changed, 75 insertions(+), 72 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 32faf7115f0fd..537cc69bf8469 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -483,6 +483,7 @@ Groupby/Resample/Rolling
 - Bug in :func:`pandas.core.groupby.GroupBy.agg` when applying a aggregation function to timezone aware data (:issue:`23683`)
 - Bug in :func:`pandas.core.groupby.GroupBy.first` and :func:`pandas.core.groupby.GroupBy.last` where timezone information would be dropped (:issue:`21603`)
 - Bug in :func:`pandas.core.groupby.GroupBy.size` when grouping only NA values (:issue:`23050`)
+- Bug in :func:`Series.groupby` where ``observed`` kwarg was previously ignored (:issue:`24880`)
 - Bug in :func:`Series.groupby` where using ``groupby`` with a :class:`MultiIndex` Series with a list of labels equal to the length of the series caused incorrect grouping (:issue:`25704`)
 - Ensured that ordering of outputs in ``groupby`` aggregation functions is consistent across all versions of Python (:issue:`25692`)
 - Ensured that result group order is correct when grouping on an ordered ``Categorical`` and specifying ``observed=True`` (:issue:`25871`, :issue:`25167`)
diff --git a/pandas/tests/groupby/conftest.py b/pandas/tests/groupby/conftest.py
index 3a8ca2383a091..cb4fe511651ee 100644
--- a/pandas/tests/groupby/conftest.py
+++ b/pandas/tests/groupby/conftest.py
@@ -76,13 +76,3 @@ def three_group():
                       'D': np.random.randn(11),
                       'E': np.random.randn(11),
                       'F': np.random.randn(11)})
-
-
-@pytest.fixture
-def df_cat():
-    df = DataFrame({'a': ['one', 'one', 'one', 'two'],
-                    'b': ['foo', 'foo', 'bar', 'foo'],
-                    'c': [1, 2, 3, 4]})
-    df['a'] = df['a'].astype('category')
-    df['b'] = df['b'].astype('category')
-    return df
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index 4b6af5da403dc..0b7bc5dc0efb9 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -966,67 +966,79 @@ def test_shift(fill_value):
     assert_equal(res, expected)
 
 
-@pytest.mark.parametrize('operation, index', [
-    ('agg', MultiIndex.from_frame(pd.DataFrame({'a': ['one', 'one', 'two'],
-                                                'b': ['foo', 'bar', 'foo']},
-                                               dtype='category'))),
-    ('apply', MultiIndex.from_frame(pd.DataFrame({'a': ['one', 'one', 'two'],
-                                                  'b': ['foo', 'bar', 'foo']}))
-     )])
-def test_groupby_series_observed_true(df_cat, operation, index):
+class TestSeriesGroupByObservedKwarg:
     # GH 24880
-    expected = pd.Series(data=[3, 3, 4], index=index, name='c')
-    grouped = df_cat.groupby(['a', 'b'], observed=True)['c']
-    result = getattr(grouped, operation)(sum)
-    assert_series_equal(result, expected)
-
-
-@pytest.mark.parametrize('operation', ['agg', 'apply'])
-@pytest.mark.parametrize('observed', [False, None])
-def test_groupby_series_observed_false_or_none(df_cat, observed, operation):
-    # GH 24880
-    index, _ = MultiIndex.from_product(
-        [CategoricalIndex(['one', 'two'],
-                          categories=['one', 'two'],
-                          ordered=False),
-         CategoricalIndex(['bar', 'foo'],
-                          categories=['bar', 'foo'],
-                          ordered=False)],
-        names=['a', 'b']).sortlevel()
-
-    expected = pd.Series(data=[3, 3, np.nan, 4], index=index, name='c')
-    grouped = df_cat.groupby(['a', 'b'], observed=observed)['c']
-    result = getattr(grouped, operation)(sum)
-    assert_series_equal(result, expected)
-
 
-@pytest.mark.parametrize("observed, index, data", [
-    (True, MultiIndex.from_tuples(
-        [('one', 'foo', 'min'), ('one', 'foo', 'max'),
-         ('one', 'bar', 'min'), ('one', 'bar', 'max'),
-         ('two', 'foo', 'min'), ('two', 'foo', 'max')],
-        names=['a', 'b', None]), [1, 2, 3, 3, 4, 4]),
-    (False, MultiIndex.from_product(
-        [CategoricalIndex(['one', 'two'],
-                          categories=['one', 'two'],
-                          ordered=False),
-         CategoricalIndex(['bar', 'foo'],
-                          categories=['bar', 'foo'],
-                          ordered=False),
-         Index(['min', 'max'])],
-        names=['a', 'b', None]), [3, 3, 1, 2, np.nan, np.nan, 4.0, 4.0]),
-    (None, MultiIndex.from_product(
-        [CategoricalIndex(['one', 'two'],
-                          categories=['one', 'two'],
-                          ordered=False),
-         CategoricalIndex(['bar', 'foo'],
-                          categories=['bar', 'foo'],
-                          ordered=False),
-         Index(['min', 'max'])],
-        names=['a', 'b', None]), [3, 3, 1, 2, np.nan, np.nan, 4.0, 4.0])])
-def test_groupby_series_observed_apply_dict(df_cat, observed, index, data):
-    # GH 24880
-    expected = pd.Series(data=data, index=index, name='c')
-    result = df_cat.groupby(['a', 'b'], observed=observed)['c'].apply(
-        lambda x: OrderedDict([('min', x.min()), ('max', x.max())]))
-    assert_series_equal(result, expected)
+    @pytest.fixture(autouse=True)
+    def setup_method(self, df):
+        self.df = df.copy()[:4]  # leave out some groups
+        self.df['A'] = self.df['A'].astype('category')
+        self.df['B'] = self.df['B'].astype('category')
+        self.df['C'] = pd.Series([1, 2, 3, 4])
+
+    @pytest.mark.parametrize('operation, index', [
+        ('agg', MultiIndex.from_frame(
+            pd.DataFrame({'A': ['foo', 'foo', 'bar', 'bar'],
+                          'B': ['one', 'two', 'one', 'three']
+                          }, dtype='category'))),
+        ('apply', MultiIndex.from_frame(
+            pd.DataFrame({'A': ['foo', 'foo', 'bar', 'bar'],
+                          'B': ['one', 'two', 'one', 'three']
+                          })))])
+    def test_true(self, operation, index):
+        expected = pd.Series(data=[1, 3, 2, 4], index=index, name='C')
+        grouped = self.df.groupby(['A', 'B'], observed=True)['C']
+        result = getattr(grouped, operation)(sum)
+        assert_series_equal(result, expected)
+
+    @pytest.mark.parametrize('operation', ['agg', 'apply'])
+    @pytest.mark.parametrize('observed', [False, None])
+    def test_false_or_none(self, observed, operation):
+        index, _ = MultiIndex.from_product(
+            [CategoricalIndex(['bar', 'foo'],
+                              categories=['bar', 'foo'],
+                              ordered=False),
+             CategoricalIndex(['one', 'three', 'two'],
+                              categories=['one', 'three', 'two'],
+                              ordered=False),
+             ],
+            names=['A', 'B']).sortlevel()
+
+        expected = pd.Series(data=[2, 4, np.nan, 1, np.nan, 3],
+                             index=index, name='C')
+        grouped = self.df.groupby(['A', 'B'], observed=observed)['C']
+        result = getattr(grouped, operation)(sum)
+        assert_series_equal(result, expected)
+
+    @pytest.mark.parametrize("observed, index, data", [
+        (True, MultiIndex.from_tuples(
+            [('foo', 'one', 'min'), ('foo', 'one', 'max'),
+             ('foo', 'two', 'min'), ('foo', 'two', 'max'),
+             ('bar', 'one', 'min'), ('bar', 'one', 'max'),
+             ('bar', 'three', 'min'), ('bar', 'three', 'max')],
+            names=['A', 'B', None]), [1, 1, 3, 3, 2, 2, 4, 4]),
+        (False, MultiIndex.from_product(
+            [CategoricalIndex(['bar', 'foo'],
+                              categories=['bar', 'foo'],
+                              ordered=False),
+             CategoricalIndex(['one', 'three', 'two'],
+                              categories=['one', 'three', 'two'],
+                              ordered=False),
+             Index(['min', 'max'])],
+            names=['A', 'B', None]),
+         [2, 2, 4, 4, np.nan, np.nan, 1, 1, np.nan, np.nan, 3, 3]),
+        (None, MultiIndex.from_product(
+            [CategoricalIndex(['bar', 'foo'],
+                              categories=['bar', 'foo'],
+                              ordered=False),
+             CategoricalIndex(['one', 'three', 'two'],
+                              categories=['one', 'three', 'two'],
+                              ordered=False),
+             Index(['min', 'max'])],
+            names=['A', 'B', None]),
+         [2, 2, 4, 4, np.nan, np.nan, 1, 1, np.nan, np.nan, 3, 3])])
+    def test_apply_dict(self, observed, index, data):
+        expected = pd.Series(data=data, index=index, name='C')
+        result = self.df.groupby(['A', 'B'], observed=observed)['C'].apply(
+            lambda x: OrderedDict([('min', x.min()), ('max', x.max())]))
+        assert_series_equal(result, expected)

From d5c9c40c92a3cb7d32b5ef8a02dc944cfd936ab6 Mon Sep 17 00:00:00 2001
From: krsnik93 <ivankrsnik93@gmail.com>
Date: Mon, 27 May 2019 21:57:03 +0100
Subject: [PATCH 13/14] Refactor tests from a class to stand-alone functions

---
 pandas/tests/groupby/test_categorical.py | 139 +++++++++++------------
 1 file changed, 65 insertions(+), 74 deletions(-)

diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index 0b7bc5dc0efb9..f3778c4289558 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -966,79 +966,70 @@ def test_shift(fill_value):
     assert_equal(res, expected)
 
 
-class TestSeriesGroupByObservedKwarg:
+@pytest.fixture
+def df_cat(df):
+    df_cat = df.copy()[:4]  # leave out some groups
+    df_cat['A'] = df_cat['A'].astype('category')
+    df_cat['B'] = df_cat['B'].astype('category')
+    df_cat['C'] = pd.Series([1, 2, 3, 4])
+    yield df_cat
+
+
+@pytest.mark.parametrize('operation, index', [
+    ('agg', MultiIndex.from_frame(
+        pd.DataFrame({'A': ['foo', 'foo', 'bar', 'bar'],
+                      'B': ['one', 'two', 'one', 'three']
+                      }, dtype='category'))),
+    ('apply', MultiIndex.from_frame(
+        pd.DataFrame({'A': ['foo', 'foo', 'bar', 'bar'],
+                      'B': ['one', 'two', 'one', 'three']
+                      })))])
+def test_seriesgroupby_observed_true(df_cat, operation, index):
     # GH 24880
+    expected = pd.Series(data=[1, 3, 2, 4], index=index, name='C')
+    grouped = df_cat.groupby(['A', 'B'], observed=True)['C']
+    result = getattr(grouped, operation)(sum)
+    assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize('operation', ['agg', 'apply'])
+@pytest.mark.parametrize('observed', [False, None])
+def test_seriesgroupby_observed_false_or_none(df_cat, observed, operation):
+    # GH 24880
+    index, _ = MultiIndex.from_product(
+        [CategoricalIndex(['bar', 'foo'], ordered=False),
+         CategoricalIndex(['one', 'three', 'two'], ordered=False)],
+        names=['A', 'B']).sortlevel()
+
+    expected = pd.Series(data=[2, 4, np.nan, 1, np.nan, 3],
+                         index=index, name='C')
+    grouped = df_cat.groupby(['A', 'B'], observed=observed)['C']
+    result = getattr(grouped, operation)(sum)
+    assert_series_equal(result, expected)
+
 
-    @pytest.fixture(autouse=True)
-    def setup_method(self, df):
-        self.df = df.copy()[:4]  # leave out some groups
-        self.df['A'] = self.df['A'].astype('category')
-        self.df['B'] = self.df['B'].astype('category')
-        self.df['C'] = pd.Series([1, 2, 3, 4])
-
-    @pytest.mark.parametrize('operation, index', [
-        ('agg', MultiIndex.from_frame(
-            pd.DataFrame({'A': ['foo', 'foo', 'bar', 'bar'],
-                          'B': ['one', 'two', 'one', 'three']
-                          }, dtype='category'))),
-        ('apply', MultiIndex.from_frame(
-            pd.DataFrame({'A': ['foo', 'foo', 'bar', 'bar'],
-                          'B': ['one', 'two', 'one', 'three']
-                          })))])
-    def test_true(self, operation, index):
-        expected = pd.Series(data=[1, 3, 2, 4], index=index, name='C')
-        grouped = self.df.groupby(['A', 'B'], observed=True)['C']
-        result = getattr(grouped, operation)(sum)
-        assert_series_equal(result, expected)
-
-    @pytest.mark.parametrize('operation', ['agg', 'apply'])
-    @pytest.mark.parametrize('observed', [False, None])
-    def test_false_or_none(self, observed, operation):
-        index, _ = MultiIndex.from_product(
-            [CategoricalIndex(['bar', 'foo'],
-                              categories=['bar', 'foo'],
-                              ordered=False),
-             CategoricalIndex(['one', 'three', 'two'],
-                              categories=['one', 'three', 'two'],
-                              ordered=False),
-             ],
-            names=['A', 'B']).sortlevel()
-
-        expected = pd.Series(data=[2, 4, np.nan, 1, np.nan, 3],
-                             index=index, name='C')
-        grouped = self.df.groupby(['A', 'B'], observed=observed)['C']
-        result = getattr(grouped, operation)(sum)
-        assert_series_equal(result, expected)
-
-    @pytest.mark.parametrize("observed, index, data", [
-        (True, MultiIndex.from_tuples(
-            [('foo', 'one', 'min'), ('foo', 'one', 'max'),
-             ('foo', 'two', 'min'), ('foo', 'two', 'max'),
-             ('bar', 'one', 'min'), ('bar', 'one', 'max'),
-             ('bar', 'three', 'min'), ('bar', 'three', 'max')],
-            names=['A', 'B', None]), [1, 1, 3, 3, 2, 2, 4, 4]),
-        (False, MultiIndex.from_product(
-            [CategoricalIndex(['bar', 'foo'],
-                              categories=['bar', 'foo'],
-                              ordered=False),
-             CategoricalIndex(['one', 'three', 'two'],
-                              categories=['one', 'three', 'two'],
-                              ordered=False),
-             Index(['min', 'max'])],
-            names=['A', 'B', None]),
-         [2, 2, 4, 4, np.nan, np.nan, 1, 1, np.nan, np.nan, 3, 3]),
-        (None, MultiIndex.from_product(
-            [CategoricalIndex(['bar', 'foo'],
-                              categories=['bar', 'foo'],
-                              ordered=False),
-             CategoricalIndex(['one', 'three', 'two'],
-                              categories=['one', 'three', 'two'],
-                              ordered=False),
-             Index(['min', 'max'])],
-            names=['A', 'B', None]),
-         [2, 2, 4, 4, np.nan, np.nan, 1, 1, np.nan, np.nan, 3, 3])])
-    def test_apply_dict(self, observed, index, data):
-        expected = pd.Series(data=data, index=index, name='C')
-        result = self.df.groupby(['A', 'B'], observed=observed)['C'].apply(
-            lambda x: OrderedDict([('min', x.min()), ('max', x.max())]))
-        assert_series_equal(result, expected)
+@pytest.mark.parametrize("observed, index, data", [
+    (True, MultiIndex.from_tuples(
+        [('foo', 'one', 'min'), ('foo', 'one', 'max'),
+         ('foo', 'two', 'min'), ('foo', 'two', 'max'),
+         ('bar', 'one', 'min'), ('bar', 'one', 'max'),
+         ('bar', 'three', 'min'), ('bar', 'three', 'max')],
+        names=['A', 'B', None]), [1, 1, 3, 3, 2, 2, 4, 4]),
+    (False, MultiIndex.from_product(
+        [CategoricalIndex(['bar', 'foo'], ordered=False),
+         CategoricalIndex(['one', 'three', 'two'], ordered=False),
+         Index(['min', 'max'])],
+        names=['A', 'B', None]),
+     [2, 2, 4, 4, np.nan, np.nan, 1, 1, np.nan, np.nan, 3, 3]),
+    (None, MultiIndex.from_product(
+        [CategoricalIndex(['bar', 'foo'], ordered=False),
+         CategoricalIndex(['one', 'three', 'two'], ordered=False),
+         Index(['min', 'max'])],
+        names=['A', 'B', None]),
+     [2, 2, 4, 4, np.nan, np.nan, 1, 1, np.nan, np.nan, 3, 3])])
+def test_seriesgroupby_observed_apply_dict(df_cat, observed, index, data):
+    # GH 24880
+    expected = pd.Series(data=data, index=index, name='C')
+    result = df_cat.groupby(['A', 'B'], observed=observed)['C'].apply(
+        lambda x: OrderedDict([('min', x.min()), ('max', x.max())]))
+    assert_series_equal(result, expected)

From ad16db89134d82a6383d31abeb9a5558e11b46c1 Mon Sep 17 00:00:00 2001
From: krsnik93 <ivankrsnik93@gmail.com>
Date: Tue, 28 May 2019 01:48:21 +0100
Subject: [PATCH 14/14] Simplify a test, add a docstring for the fixture and
 drop pd.* prefix for already imported

---
 pandas/tests/groupby/test_categorical.py | 230 ++++++++++++-----------
 1 file changed, 122 insertions(+), 108 deletions(-)

diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index f3778c4289558..f24fa0daa5b18 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -26,7 +26,7 @@ def f(a):
                                        ordered=a.ordered)
         return a
 
-    index = pd.MultiIndex.from_product(map(f, args), names=names)
+    index = MultiIndex.from_product(map(f, args), names=names)
     return result.reindex(index).sort_index()
 
 
@@ -190,7 +190,7 @@ def test_level_get_group(observed):
     # GH15155
     df = DataFrame(data=np.arange(2, 22, 2),
                    index=MultiIndex(
-                       levels=[pd.CategoricalIndex(["a", "b"]), range(10)],
+                       levels=[CategoricalIndex(["a", "b"]), range(10)],
                        codes=[[0] * 5 + [1] * 5, range(10)],
                        names=["Index1", "Index2"]))
     g = df.groupby(level=["Index1"], observed=observed)
@@ -198,7 +198,7 @@ def test_level_get_group(observed):
     # expected should equal test.loc[["a"]]
     # GH15166
     expected = DataFrame(data=np.arange(2, 12, 2),
-                         index=pd.MultiIndex(levels=[pd.CategoricalIndex(
+                         index=MultiIndex(levels=[CategoricalIndex(
                              ["a", "b"]), range(5)],
         codes=[[0] * 5, range(5)],
         names=["Index1", "Index2"]))
@@ -266,7 +266,7 @@ def test_observed(observed):
 
     # multiple groupers with a non-cat
     gb = df.groupby(['A', 'B', 'C'], observed=observed)
-    exp_index = pd.MultiIndex.from_arrays(
+    exp_index = MultiIndex.from_arrays(
         [cat1, cat2, ['foo', 'bar'] * 2],
         names=['A', 'B', 'C'])
     expected = DataFrame({'values': Series(
@@ -281,7 +281,7 @@ def test_observed(observed):
     tm.assert_frame_equal(result, expected)
 
     gb = df.groupby(['A', 'B'], observed=observed)
-    exp_index = pd.MultiIndex.from_arrays(
+    exp_index = MultiIndex.from_arrays(
         [cat1, cat2],
         names=['A', 'B'])
     expected = DataFrame({'values': [1, 2, 3, 4]},
@@ -297,25 +297,25 @@ def test_observed(observed):
 
     # https://github.com/pandas-dev/pandas/issues/8138
     d = {'cat':
-         pd.Categorical(["a", "b", "a", "b"], categories=["a", "b", "c"],
-                        ordered=True),
+         Categorical(["a", "b", "a", "b"], categories=["a", "b", "c"],
+                     ordered=True),
          'ints': [1, 1, 2, 2],
          'val': [10, 20, 30, 40]}
-    df = pd.DataFrame(d)
+    df = DataFrame(d)
 
     # Grouping on a single column
     groups_single_key = df.groupby("cat", observed=observed)
     result = groups_single_key.mean()
 
-    exp_index = pd.CategoricalIndex(list('ab'), name="cat",
-                                    categories=list('abc'),
-                                    ordered=True)
+    exp_index = CategoricalIndex(list('ab'), name="cat",
+                                 categories=list('abc'),
+                                 ordered=True)
     expected = DataFrame({"ints": [1.5, 1.5], "val": [20., 30]},
                          index=exp_index)
     if not observed:
-        index = pd.CategoricalIndex(list('abc'), name="cat",
-                                    categories=list('abc'),
-                                    ordered=True)
+        index = CategoricalIndex(list('abc'), name="cat",
+                                 categories=list('abc'),
+                                 ordered=True)
         expected = expected.reindex(index)
 
     tm.assert_frame_equal(result, expected)
@@ -325,9 +325,9 @@ def test_observed(observed):
     result = groups_double_key.agg('mean')
     expected = DataFrame(
         {"val": [10, 30, 20, 40],
-         "cat": pd.Categorical(['a', 'a', 'b', 'b'],
-                               categories=['a', 'b', 'c'],
-                               ordered=True),
+         "cat": Categorical(['a', 'a', 'b', 'b'],
+                            categories=['a', 'b', 'c'],
+                            ordered=True),
          "ints": [1, 2, 1, 2]}).set_index(["cat", "ints"])
     if not observed:
         expected = cartesian_product_for_groupers(
@@ -348,7 +348,7 @@ def test_observed(observed):
     # with as_index
     d = {'foo': [10, 8, 4, 8, 4, 1, 1], 'bar': [10, 20, 30, 40, 50, 60, 70],
          'baz': ['d', 'c', 'e', 'a', 'a', 'd', 'c']}
-    df = pd.DataFrame(d)
+    df = DataFrame(d)
     cat = pd.cut(df['foo'], np.linspace(0, 10, 3))
     df['range'] = cat
     groups = df.groupby(['range', 'baz'], as_index=False, observed=observed)
@@ -361,7 +361,7 @@ def test_observed(observed):
 
 def test_observed_codes_remap(observed):
     d = {'C1': [3, 3, 4, 5], 'C2': [1, 2, 3, 4], 'C3': [10, 100, 200, 34]}
-    df = pd.DataFrame(d)
+    df = DataFrame(d)
     values = pd.cut(df['C1'], [1, 2, 3, 6])
     values.name = "cat"
     groups_double_key = df.groupby([values, 'C2'], observed=observed)
@@ -402,8 +402,8 @@ def test_observed_groups(observed):
     # gh-20583
     # test that we have the appropriate groups
 
-    cat = pd.Categorical(['a', 'c', 'a'], categories=['a', 'b', 'c'])
-    df = pd.DataFrame({'cat': cat, 'vals': [1, 2, 3]})
+    cat = Categorical(['a', 'c', 'a'], categories=['a', 'b', 'c'])
+    df = DataFrame({'cat': cat, 'vals': [1, 2, 3]})
     g = df.groupby('cat', observed=observed)
 
     result = g.groups
@@ -420,9 +420,9 @@ def test_observed_groups(observed):
 
 def test_observed_groups_with_nan(observed):
     # GH 24740
-    df = pd.DataFrame({'cat': pd.Categorical(['a', np.nan, 'a'],
-                       categories=['a', 'b', 'd']),
-                       'vals': [1, 2, 3]})
+    df = DataFrame({'cat': Categorical(['a', np.nan, 'a'],
+                    categories=['a', 'b', 'd']),
+                    'vals': [1, 2, 3]})
     g = df.groupby('cat', observed=observed)
     result = g.groups
     if observed:
@@ -436,16 +436,16 @@ def test_observed_groups_with_nan(observed):
 
 def test_dataframe_categorical_with_nan(observed):
     # GH 21151
-    s1 = pd.Categorical([np.nan, 'a', np.nan, 'a'],
-                        categories=['a', 'b', 'c'])
-    s2 = pd.Series([1, 2, 3, 4])
-    df = pd.DataFrame({'s1': s1, 's2': s2})
+    s1 = Categorical([np.nan, 'a', np.nan, 'a'],
+                     categories=['a', 'b', 'c'])
+    s2 = Series([1, 2, 3, 4])
+    df = DataFrame({'s1': s1, 's2': s2})
     result = df.groupby('s1', observed=observed).first().reset_index()
     if observed:
-        expected = DataFrame({'s1': pd.Categorical(['a'],
+        expected = DataFrame({'s1': Categorical(['a'],
                               categories=['a', 'b', 'c']), 's2': [2]})
     else:
-        expected = DataFrame({'s1': pd.Categorical(['a', 'b', 'c'],
+        expected = DataFrame({'s1': Categorical(['a', 'b', 'c'],
                               categories=['a', 'b', 'c']),
                               's2': [2, np.nan, np.nan]})
     tm.assert_frame_equal(result, expected)
@@ -460,11 +460,11 @@ def test_dataframe_categorical_ordered_observed_sort(ordered, observed, sort):
 
     # Build a dataframe with cat having one unobserved category ('missing'),
     # and a Series with identical values
-    label = pd.Categorical(['d', 'a', 'b', 'a', 'd', 'b'],
-                           categories=['a', 'b', 'missing', 'd'],
-                           ordered=ordered)
-    val = pd.Series(['d', 'a', 'b', 'a', 'd', 'b'])
-    df = pd.DataFrame({'label': label, 'val': val})
+    label = Categorical(['d', 'a', 'b', 'a', 'd', 'b'],
+                        categories=['a', 'b', 'missing', 'd'],
+                        ordered=ordered)
+    val = Series(['d', 'a', 'b', 'a', 'd', 'b'])
+    df = DataFrame({'label': label, 'val': val})
 
     # aggregate on the Categorical
     result = (df.groupby('label', observed=observed, sort=sort)['val']
@@ -472,8 +472,8 @@ def test_dataframe_categorical_ordered_observed_sort(ordered, observed, sort):
 
     # If ordering works, we expect index labels equal to aggregation results,
     # except for 'observed=False': label 'missing' has aggregation None
-    label = pd.Series(result.index.array, dtype='object')
-    aggr = pd.Series(result.array)
+    label = Series(result.index.array, dtype='object')
+    aggr = Series(result.array)
     if not observed:
         aggr[aggr.isna()] = 'missing'
     if not all(label == aggr):
@@ -556,9 +556,9 @@ def test_categorical_index():
 
 def test_describe_categorical_columns():
     # GH 11558
-    cats = pd.CategoricalIndex(['qux', 'foo', 'baz', 'bar'],
-                               categories=['foo', 'bar', 'baz', 'qux'],
-                               ordered=True)
+    cats = CategoricalIndex(['qux', 'foo', 'baz', 'bar'],
+                            categories=['foo', 'bar', 'baz', 'qux'],
+                            ordered=True)
     df = DataFrame(np.random.randn(20, 4), columns=cats)
     result = df.groupby([1, 2, 3, 4] * 5).describe()
 
@@ -568,22 +568,22 @@ def test_describe_categorical_columns():
 
 def test_unstack_categorical():
     # GH11558 (example is taken from the original issue)
-    df = pd.DataFrame({'a': range(10),
-                       'medium': ['A', 'B'] * 5,
-                       'artist': list('XYXXY') * 2})
+    df = DataFrame({'a': range(10),
+                    'medium': ['A', 'B'] * 5,
+                    'artist': list('XYXXY') * 2})
     df['medium'] = df['medium'].astype('category')
 
     gcat = df.groupby(
         ['artist', 'medium'], observed=False)['a'].count().unstack()
     result = gcat.describe()
 
-    exp_columns = pd.CategoricalIndex(['A', 'B'], ordered=False,
-                                      name='medium')
+    exp_columns = CategoricalIndex(['A', 'B'], ordered=False,
+                                   name='medium')
     tm.assert_index_equal(result.columns, exp_columns)
     tm.assert_categorical_equal(result.columns.values, exp_columns.values)
 
     result = gcat['A'] + gcat['B']
-    expected = pd.Series([6, 4], index=pd.Index(['X', 'Y'], name='artist'))
+    expected = Series([6, 4], index=Index(['X', 'Y'], name='artist'))
     tm.assert_series_equal(result, expected)
 
 
@@ -645,22 +645,22 @@ def test_preserve_categories():
     categories = list('abc')
 
     # ordered=True
-    df = DataFrame({'A': pd.Categorical(list('ba'),
-                                        categories=categories,
-                                        ordered=True)})
-    index = pd.CategoricalIndex(categories, categories, ordered=True)
+    df = DataFrame({'A': Categorical(list('ba'),
+                                     categories=categories,
+                                     ordered=True)})
+    index = CategoricalIndex(categories, categories, ordered=True)
     tm.assert_index_equal(
         df.groupby('A', sort=True, observed=False).first().index, index)
     tm.assert_index_equal(
         df.groupby('A', sort=False, observed=False).first().index, index)
 
     # ordered=False
-    df = DataFrame({'A': pd.Categorical(list('ba'),
-                                        categories=categories,
-                                        ordered=False)})
-    sort_index = pd.CategoricalIndex(categories, categories, ordered=False)
-    nosort_index = pd.CategoricalIndex(list('bac'), list('bac'),
-                                       ordered=False)
+    df = DataFrame({'A': Categorical(list('ba'),
+                                     categories=categories,
+                                     ordered=False)})
+    sort_index = CategoricalIndex(categories, categories, ordered=False)
+    nosort_index = CategoricalIndex(list('bac'), list('bac'),
+                                    ordered=False)
     tm.assert_index_equal(
         df.groupby('A', sort=True, observed=False).first().index,
         sort_index)
@@ -858,94 +858,94 @@ def test_sort_datetimelike():
 
 def test_empty_sum():
     # https://github.com/pandas-dev/pandas/issues/18678
-    df = pd.DataFrame({"A": pd.Categorical(['a', 'a', 'b'],
-                                           categories=['a', 'b', 'c']),
-                       'B': [1, 2, 1]})
-    expected_idx = pd.CategoricalIndex(['a', 'b', 'c'], name='A')
+    df = DataFrame({"A": Categorical(['a', 'a', 'b'],
+                                     categories=['a', 'b', 'c']),
+                    'B': [1, 2, 1]})
+    expected_idx = CategoricalIndex(['a', 'b', 'c'], name='A')
 
     # 0 by default
     result = df.groupby("A", observed=False).B.sum()
-    expected = pd.Series([3, 1, 0], expected_idx, name='B')
+    expected = Series([3, 1, 0], expected_idx, name='B')
     tm.assert_series_equal(result, expected)
 
     # min_count=0
     result = df.groupby("A", observed=False).B.sum(min_count=0)
-    expected = pd.Series([3, 1, 0], expected_idx, name='B')
+    expected = Series([3, 1, 0], expected_idx, name='B')
     tm.assert_series_equal(result, expected)
 
     # min_count=1
     result = df.groupby("A", observed=False).B.sum(min_count=1)
-    expected = pd.Series([3, 1, np.nan], expected_idx, name='B')
+    expected = Series([3, 1, np.nan], expected_idx, name='B')
     tm.assert_series_equal(result, expected)
 
     # min_count>1
     result = df.groupby("A", observed=False).B.sum(min_count=2)
-    expected = pd.Series([3, np.nan, np.nan], expected_idx, name='B')
+    expected = Series([3, np.nan, np.nan], expected_idx, name='B')
     tm.assert_series_equal(result, expected)
 
 
 def test_empty_prod():
     # https://github.com/pandas-dev/pandas/issues/18678
-    df = pd.DataFrame({"A": pd.Categorical(['a', 'a', 'b'],
-                                           categories=['a', 'b', 'c']),
-                       'B': [1, 2, 1]})
+    df = DataFrame({"A": Categorical(['a', 'a', 'b'],
+                                     categories=['a', 'b', 'c']),
+                    'B': [1, 2, 1]})
 
-    expected_idx = pd.CategoricalIndex(['a', 'b', 'c'], name='A')
+    expected_idx = CategoricalIndex(['a', 'b', 'c'], name='A')
 
     # 1 by default
     result = df.groupby("A", observed=False).B.prod()
-    expected = pd.Series([2, 1, 1], expected_idx, name='B')
+    expected = Series([2, 1, 1], expected_idx, name='B')
     tm.assert_series_equal(result, expected)
 
     # min_count=0
     result = df.groupby("A", observed=False).B.prod(min_count=0)
-    expected = pd.Series([2, 1, 1], expected_idx, name='B')
+    expected = Series([2, 1, 1], expected_idx, name='B')
     tm.assert_series_equal(result, expected)
 
     # min_count=1
     result = df.groupby("A", observed=False).B.prod(min_count=1)
-    expected = pd.Series([2, 1, np.nan], expected_idx, name='B')
+    expected = Series([2, 1, np.nan], expected_idx, name='B')
     tm.assert_series_equal(result, expected)
 
 
 def test_groupby_multiindex_categorical_datetime():
     # https://github.com/pandas-dev/pandas/issues/21390
 
-    df = pd.DataFrame({
-        'key1': pd.Categorical(list('abcbabcba')),
-        'key2': pd.Categorical(
+    df = DataFrame({
+        'key1': Categorical(list('abcbabcba')),
+        'key2': Categorical(
             list(pd.date_range('2018-06-01 00', freq='1T', periods=3)) * 3),
         'values': np.arange(9),
     })
     result = df.groupby(['key1', 'key2']).mean()
 
-    idx = pd.MultiIndex.from_product(
-        [pd.Categorical(['a', 'b', 'c']),
-         pd.Categorical(pd.date_range('2018-06-01 00', freq='1T', periods=3))],
+    idx = MultiIndex.from_product(
+        [Categorical(['a', 'b', 'c']),
+         Categorical(pd.date_range('2018-06-01 00', freq='1T', periods=3))],
         names=['key1', 'key2'])
-    expected = pd.DataFrame(
+    expected = DataFrame(
         {'values': [0, 4, 8, 3, 4, 5, 6, np.nan, 2]}, index=idx)
     assert_frame_equal(result, expected)
 
 
 @pytest.mark.parametrize("as_index, expected", [
-    (True, pd.Series(
-        index=pd.MultiIndex.from_arrays(
-            [pd.Series([1, 1, 2], dtype='category'),
-                [1, 2, 2]], names=['a', 'b']
+    (True, Series(
+        index=MultiIndex.from_arrays(
+            [Series([1, 1, 2], dtype='category'),
+             [1, 2, 2]], names=['a', 'b']
         ),
         data=[1, 2, 3], name='x'
     )),
-    (False, pd.DataFrame({
-        'a': pd.Series([1, 1, 2], dtype='category'),
+    (False, DataFrame({
+        'a': Series([1, 1, 2], dtype='category'),
         'b': [1, 2, 2],
         'x': [1, 2, 3]
     }))
 ])
 def test_groupby_agg_observed_true_single_column(as_index, expected):
     # GH-23970
-    df = pd.DataFrame({
-        'a': pd.Series([1, 1, 2], dtype='category'),
+    df = DataFrame({
+        'a': Series([1, 1, 2], dtype='category'),
         'b': [1, 2, 2],
         'x': [1, 2, 3]
     })
@@ -958,35 +958,49 @@ def test_groupby_agg_observed_true_single_column(as_index, expected):
 
 @pytest.mark.parametrize('fill_value', [None, np.nan, pd.NaT])
 def test_shift(fill_value):
-    ct = pd.Categorical(['a', 'b', 'c', 'd'],
-                        categories=['a', 'b', 'c', 'd'], ordered=False)
-    expected = pd.Categorical([None, 'a', 'b', 'c'],
-                              categories=['a', 'b', 'c', 'd'], ordered=False)
+    ct = Categorical(['a', 'b', 'c', 'd'],
+                     categories=['a', 'b', 'c', 'd'], ordered=False)
+    expected = Categorical([None, 'a', 'b', 'c'],
+                           categories=['a', 'b', 'c', 'd'], ordered=False)
     res = ct.shift(1, fill_value=fill_value)
     assert_equal(res, expected)
 
 
 @pytest.fixture
 def df_cat(df):
+    """
+    DataFrame with multiple categorical columns and a column of integers.
+    Shortened so as not to contain all possible combinations of categories.
+    Useful for testing `observed` kwarg functionality on GroupBy objects.
+
+    Parameters
+    ----------
+    df: DataFrame
+        Non-categorical, longer DataFrame from another fixture, used to derive
+        this one
+
+    Returns
+    -------
+    df_cat: DataFrame
+    """
     df_cat = df.copy()[:4]  # leave out some groups
     df_cat['A'] = df_cat['A'].astype('category')
     df_cat['B'] = df_cat['B'].astype('category')
-    df_cat['C'] = pd.Series([1, 2, 3, 4])
-    yield df_cat
-
-
-@pytest.mark.parametrize('operation, index', [
-    ('agg', MultiIndex.from_frame(
-        pd.DataFrame({'A': ['foo', 'foo', 'bar', 'bar'],
-                      'B': ['one', 'two', 'one', 'three']
-                      }, dtype='category'))),
-    ('apply', MultiIndex.from_frame(
-        pd.DataFrame({'A': ['foo', 'foo', 'bar', 'bar'],
-                      'B': ['one', 'two', 'one', 'three']
-                      })))])
-def test_seriesgroupby_observed_true(df_cat, operation, index):
+    df_cat['C'] = Series([1, 2, 3, 4])
+    df_cat = df_cat.drop(['D'], axis=1)
+    return df_cat
+
+
+@pytest.mark.parametrize('operation, kwargs', [
+    ('agg', dict(dtype='category')),
+    ('apply', dict())])
+def test_seriesgroupby_observed_true(df_cat, operation, kwargs):
     # GH 24880
-    expected = pd.Series(data=[1, 3, 2, 4], index=index, name='C')
+    index = MultiIndex.from_frame(
+        DataFrame({'A': ['foo', 'foo', 'bar', 'bar'],
+                   'B': ['one', 'two', 'one', 'three']
+                   }, **kwargs))
+    expected = Series(data=[1, 3, 2, 4], index=index, name='C')
     grouped = df_cat.groupby(['A', 'B'], observed=True)['C']
     result = getattr(grouped, operation)(sum)
     assert_series_equal(result, expected)
@@ -1001,8 +1015,8 @@ def test_seriesgroupby_observed_false_or_none(df_cat, observed, operation):
          CategoricalIndex(['one', 'three', 'two'], ordered=False)],
         names=['A', 'B']).sortlevel()
 
-    expected = pd.Series(data=[2, 4, np.nan, 1, np.nan, 3],
-                         index=index, name='C')
+    expected = Series(data=[2, 4, np.nan, 1, np.nan, 3],
+                      index=index, name='C')
     grouped = df_cat.groupby(['A', 'B'], observed=observed)['C']
     result = getattr(grouped, operation)(sum)
     assert_series_equal(result, expected)
@@ -1029,7 +1043,7 @@ def test_seriesgroupby_observed_false_or_none(df_cat, observed, operation):
      [2, 2, 4, 4, np.nan, np.nan, 1, 1, np.nan, np.nan, 3, 3])])
 def test_seriesgroupby_observed_apply_dict(df_cat, observed, index, data):
     # GH 24880
-    expected = pd.Series(data=data, index=index, name='C')
+    expected = Series(data=data, index=index, name='C')
     result = df_cat.groupby(['A', 'B'], observed=observed)['C'].apply(
         lambda x: OrderedDict([('min', x.min()), ('max', x.max())]))
     assert_series_equal(result, expected)