From debed82067c88f218cc0af7347c2b2a2bdec6916 Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Sun, 12 Jul 2020 23:24:57 +0100
Subject: [PATCH 1/5] #34951 bug fixed and test added

---
 doc/source/whatsnew/v1.1.0.rst           |  1 +
 pandas/core/groupby/generic.py           |  2 +-
 pandas/tests/groupby/test_categorical.py | 23 +++++++++++++++++++++++
 3 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 5f93e08d51baa..b56d7c2599033 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -1091,6 +1091,7 @@ Groupby/resample/rolling
 - Bug in :meth:`Rolling.apply` where ``center=True`` was ignored when ``engine='numba'`` was specified (:issue:`34784`)
 - Bug in :meth:`DataFrame.ewm.cov` was throwing ``AssertionError`` for :class:`MultiIndex` inputs (:issue:`34440`)
 - Bug in :meth:`core.groupby.DataFrameGroupBy.transform` when ``func='nunique'`` and columns are of type ``datetime64``, the result would also be of type ``datetime64`` instead of ``int64`` (:issue:`35109`)
+- Bug in :meth:'DataFrameGroupBy.first' and :meth:'DataFrameGroupBy.last' that would raise ``ValueError`` grouping on multiple ``Categoricals`` (:issue:`34951`)
 
 Reshaping
 ^^^^^^^^^
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 1f49ee2b0b665..9acea86b14103 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1058,7 +1058,7 @@ def _cython_agg_blocks(
                     #  reductions; see GH#28949
                     obj = obj.iloc[:, 0]
 
-                s = get_groupby(obj, self.grouper)
+                s = get_groupby(obj, self.grouper, observed=True)
                 try:
                     result = s.aggregate(lambda x: alt(x, axis=self.axis))
                 except TypeError:
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index 118d928ac02f4..b51f84618e977 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -1669,3 +1669,26 @@ def test_categorical_transform():
     expected["status"] = expected["status"].astype(delivery_status_type)
 
     tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("func", ["first", "last"])
+def test_groupby_first_on_categorical_col_grouped_on_2_categoricals(func: str):
+
+    cat = pd.Categorical([0, 0, 1, 1])
+    val = [0, 1, 1, 0]
+    df = pd.DataFrame({"a": cat, "b": cat, "c": val})
+
+    idx = pd.Categorical([0, 1])
+    idx = pd.MultiIndex.from_product([idx, idx], names=["a", "b"])
+    expected = {
+        "first": pd.Series([0, np.NaN, np.NaN, 1], idx, name="c"),
+        "last": pd.Series([1, np.NaN, np.NaN, 0], idx, name="c"),
+    }
+
+    df_grp = df.groupby(["a", "b"])
+    df_res = getattr(df_grp, func)()
+    tm.assert_frame_equal(df_res, expected[func].to_frame())
+
+    srs_grp = df_grp["c"]
+    srs_res = getattr(srs_grp, func)()
+    tm.assert_series_equal(srs_res, expected[func])

From b6903c6df39b36d857ffbae56646485b60cd5a7f Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Mon, 13 Jul 2020 00:06:06 +0100
Subject: [PATCH 2/5] issue number in test

---
 pandas/tests/groupby/test_categorical.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index b51f84618e977..b5d77a04da63d 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -1673,6 +1673,7 @@ def test_categorical_transform():
 
 @pytest.mark.parametrize("func", ["first", "last"])
 def test_groupby_first_on_categorical_col_grouped_on_2_categoricals(func: str):
+    # GH 34951
 
     cat = pd.Categorical([0, 0, 1, 1])
     val = [0, 1, 1, 0]
@@ -1684,6 +1685,7 @@ def test_groupby_first_on_categorical_col_grouped_on_2_categoricals(func: str):
         "first": pd.Series([0, np.NaN, np.NaN, 1], idx, name="c"),
         "last": pd.Series([1, np.NaN, np.NaN, 0], idx, name="c"),
     }
+    import pdb; pdb.set_trace()
 
     df_grp = df.groupby(["a", "b"])
     df_res = getattr(df_grp, func)()

From fe2a8ba70c0d0889586809ae47cedd58ccfab639 Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Mon, 13 Jul 2020 00:14:20 +0100
Subject: [PATCH 3/5] removed errant pdb.set_trace()

---
 pandas/tests/groupby/test_categorical.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index b5d77a04da63d..a87de65800008 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -1685,7 +1685,6 @@ def test_groupby_first_on_categorical_col_grouped_on_2_categoricals(func: str):
         "first": pd.Series([0, np.NaN, np.NaN, 1], idx, name="c"),
         "last": pd.Series([1, np.NaN, np.NaN, 0], idx, name="c"),
     }
-    import pdb; pdb.set_trace()
 
     df_grp = df.groupby(["a", "b"])
     df_res = getattr(df_grp, func)()

From 7bbb3be8a27608cf4f530e10a7a7310b74015fc1 Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Mon, 13 Jul 2020 20:43:30 +0100
Subject: [PATCH 4/5] addressing comments

---
 doc/source/whatsnew/v1.1.0.rst           |  2 +-
 pandas/core/groupby/generic.py           |  4 ++++
 pandas/tests/groupby/test_categorical.py | 25 ++++++++++++++++--------
 3 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index b56d7c2599033..85b29a58a1f15 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -1091,7 +1091,7 @@ Groupby/resample/rolling
 - Bug in :meth:`Rolling.apply` where ``center=True`` was ignored when ``engine='numba'`` was specified (:issue:`34784`)
 - Bug in :meth:`DataFrame.ewm.cov` was throwing ``AssertionError`` for :class:`MultiIndex` inputs (:issue:`34440`)
 - Bug in :meth:`core.groupby.DataFrameGroupBy.transform` when ``func='nunique'`` and columns are of type ``datetime64``, the result would also be of type ``datetime64`` instead of ``int64`` (:issue:`35109`)
-- Bug in :meth:'DataFrameGroupBy.first' and :meth:'DataFrameGroupBy.last' that would raise ``ValueError`` grouping on multiple ``Categoricals`` (:issue:`34951`)
+- Bug in :meth:'DataFrameGroupBy.first' and :meth:'DataFrameGroupBy.last' that would raise an unnecessary ``ValueError`` when grouping on multiple ``Categoricals`` (:issue:`34951`)
 
 Reshaping
 ^^^^^^^^^
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 9acea86b14103..093e1d4ab3942 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1058,6 +1058,10 @@ def _cython_agg_blocks(
                     #  reductions; see GH#28949
                     obj = obj.iloc[:, 0]
 
+                # Create SeriesGroupBy with observed=True so that it does
+                # not try to add missing categories if grouping over multiple
+                # Categoricals. This will done by later self._reindex_output()
+                # Doing it here creates an error. See GH#34951
                 s = get_groupby(obj, self.grouper, observed=True)
                 try:
                     result = s.aggregate(lambda x: alt(x, axis=self.axis))
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index a87de65800008..f2be125632ad7 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -1672,7 +1672,9 @@ def test_categorical_transform():
 
 
 @pytest.mark.parametrize("func", ["first", "last"])
-def test_groupby_first_on_categorical_col_grouped_on_2_categoricals(func: str):
+def test_groupby_first_on_categorical_col_grouped_on_2_categoricals(
+    func: str, observed: bool
+):
     # GH 34951
 
     cat = pd.Categorical([0, 0, 1, 1])
@@ -1681,15 +1683,22 @@ def test_groupby_first_on_categorical_col_grouped_on_2_categoricals(func: str):
 
     idx = pd.Categorical([0, 1])
     idx = pd.MultiIndex.from_product([idx, idx], names=["a", "b"])
-    expected = {
+    expected_dict = {
         "first": pd.Series([0, np.NaN, np.NaN, 1], idx, name="c"),
         "last": pd.Series([1, np.NaN, np.NaN, 0], idx, name="c"),
     }
 
-    df_grp = df.groupby(["a", "b"])
-    df_res = getattr(df_grp, func)()
-    tm.assert_frame_equal(df_res, expected[func].to_frame())
+    expected = expected_dict[func]
+    if observed:
+        expected = expected.dropna().astype(np.int64)
+
+    # Check SeriesGroupBy
+    srs_grp = df.groupby(["a", "b"], observed=observed)["c"]
+    result = getattr(srs_grp, func)()
+    tm.assert_series_equal(result, expected)
 
-    srs_grp = df_grp["c"]
-    srs_res = getattr(srs_grp, func)()
-    tm.assert_series_equal(srs_res, expected[func])
+    # Check DataFrameGroupBy
+    df_grp = df.groupby(["a", "b"], observed=observed)
+    result = getattr(df_grp, func)()
+    expected = expected.to_frame()
+    tm.assert_frame_equal(result, expected)

From d098586b1ca08cd628ab39f0c3ccfd2fc1420fe6 Mon Sep 17 00:00:00 2001
From: smithto1 <thomassmith0304@gmail.com>
Date: Mon, 13 Jul 2020 21:18:20 +0100
Subject: [PATCH 5/5] Typing Validation failed. Split new test into separate
 tests for SeriesGroupBy and DataFrameGroupBy

---
 pandas/tests/groupby/test_categorical.py | 27 +++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index f2be125632ad7..7e4513da37dc9 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -1672,11 +1672,10 @@ def test_categorical_transform():
 
 
 @pytest.mark.parametrize("func", ["first", "last"])
-def test_groupby_first_on_categorical_col_grouped_on_2_categoricals(
+def test_series_groupby_first_on_categorical_col_grouped_on_2_categoricals(
     func: str, observed: bool
 ):
     # GH 34951
-
     cat = pd.Categorical([0, 0, 1, 1])
     val = [0, 1, 1, 0]
     df = pd.DataFrame({"a": cat, "b": cat, "c": val})
@@ -1692,13 +1691,31 @@ def test_groupby_first_on_categorical_col_grouped_on_2_categoricals(
     if observed:
         expected = expected.dropna().astype(np.int64)
 
-    # Check SeriesGroupBy
     srs_grp = df.groupby(["a", "b"], observed=observed)["c"]
     result = getattr(srs_grp, func)()
     tm.assert_series_equal(result, expected)
 
-    # Check DataFrameGroupBy
+
+@pytest.mark.parametrize("func", ["first", "last"])
+def test_df_groupby_first_on_categorical_col_grouped_on_2_categoricals(
+    func: str, observed: bool
+):
+    # GH 34951
+    cat = pd.Categorical([0, 0, 1, 1])
+    val = [0, 1, 1, 0]
+    df = pd.DataFrame({"a": cat, "b": cat, "c": val})
+
+    idx = pd.Categorical([0, 1])
+    idx = pd.MultiIndex.from_product([idx, idx], names=["a", "b"])
+    expected_dict = {
+        "first": pd.Series([0, np.NaN, np.NaN, 1], idx, name="c"),
+        "last": pd.Series([1, np.NaN, np.NaN, 0], idx, name="c"),
+    }
+
+    expected = expected_dict[func].to_frame()
+    if observed:
+        expected = expected.dropna().astype(np.int64)
+
     df_grp = df.groupby(["a", "b"], observed=observed)
     result = getattr(df_grp, func)()
-    expected = expected.to_frame()
     tm.assert_frame_equal(result, expected)