From debb02ae1830dc511e237ad2e354bc0256aaeabc Mon Sep 17 00:00:00 2001
From: Koustav-Samaddar <koustav201197@gmail.com>
Date: Mon, 24 Dec 2018 16:02:43 +0000
Subject: [PATCH 01/14] GH23970 Added test-case that causes bug 23970

---
 pandas/tests/groupby/test_groupby.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 6d9f60df45ec8..e96d4a170aecc 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -1694,3 +1694,15 @@ def test_groupby_agg_ohlc_non_first():
     result = df.groupby(pd.Grouper(freq='D')).agg(['sum', 'ohlc'])
 
     tm.assert_frame_equal(result, expected)
+
+
+def test_groupby_agg_observed_lost_err():
+    # GH-23970
+    df = pd.DataFrame({
+        'a': pd.Series([1, 1, 2], dtype='category'),
+        'b': [1, 2, 2], 'x': [1, 2, 3]})
+
+    expected = df
+    result = df.groupby(['a', 'b'], as_index=False, observed=True)['x'].sum()
+
+    tm.assert_frame_equal(result, expected)

From f450dc27abe1ef42f6d75ac77d3a928cdbd60f08 Mon Sep 17 00:00:00 2001
From: Koustav-Samaddar <koustav201197@gmail.com>
Date: Mon, 24 Dec 2018 16:12:51 +0000
Subject: [PATCH 02/14] GH 23970 Fixed source of the bug

Bug would have impacted any groupby function that relied on `observed`
if it were `True`
---
 pandas/core/groupby/generic.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 33a41ab1cabc4..c5142a4ee98cc 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1336,7 +1336,8 @@ def _gotitem(self, key, ndim, subset=None):
             return DataFrameGroupBy(subset, self.grouper, selection=key,
                                     grouper=self.grouper,
                                     exclusions=self.exclusions,
-                                    as_index=self.as_index)
+                                    as_index=self.as_index,
+                                    observed=self.observed)
         elif ndim == 1:
             if subset is None:
                 subset = self.obj[key]

From 3b50900ddf83937b717d74a8a0a6fec125de6955 Mon Sep 17 00:00:00 2001
From: Koustav-Samaddar <koustav201197@gmail.com>
Date: Mon, 24 Dec 2018 16:15:36 +0000
Subject: [PATCH 03/14] GH23970 Added relevant docstring to whatsnew

---
 doc/source/whatsnew/v0.24.0.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index a2abda019812a..ec26c9a8a060c 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -1560,6 +1560,7 @@ Groupby/Resample/Rolling
 - Bug in :meth:`pandas.core.groupby.GroupBy.rank` with ``method='dense'`` and ``pct=True`` when a group has only one member would raise a ``ZeroDivisionError`` (:issue:`23666`).
 - Calling :meth:`pandas.core.groupby.GroupBy.rank` with empty groups and ``pct=True`` was raising a ``ZeroDivisionError`` (:issue:`22519`)
 - Bug in :meth:`DataFrame.resample` when resampling ``NaT`` in ``TimeDeltaIndex`` (:issue:`13223`).
+- Bug in :meth:`DataFrameGroupBy._gotitem` where `observed` parameter was not passed properly, causing subsequent function calls that rely on `observed` to misbehave if called with ``observed=True`` (:issue:`23970`)
 
 Reshaping
 ^^^^^^^^^

From e6e989444637dfd69be538f1200619e9aac4ef74 Mon Sep 17 00:00:00 2001
From: Koustav-Samaddar <koustav201197@gmail.com>
Date: Mon, 24 Dec 2018 23:08:16 +0000
Subject: [PATCH 04/14] Made requested change to docstring

---
 doc/source/whatsnew/v0.24.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index ec26c9a8a060c..768181a46ba08 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -1560,7 +1560,7 @@ Groupby/Resample/Rolling
 - Bug in :meth:`pandas.core.groupby.GroupBy.rank` with ``method='dense'`` and ``pct=True`` when a group has only one member would raise a ``ZeroDivisionError`` (:issue:`23666`).
 - Calling :meth:`pandas.core.groupby.GroupBy.rank` with empty groups and ``pct=True`` was raising a ``ZeroDivisionError`` (:issue:`22519`)
 - Bug in :meth:`DataFrame.resample` when resampling ``NaT`` in ``TimeDeltaIndex`` (:issue:`13223`).
-- Bug in :meth:`DataFrameGroupBy._gotitem` where `observed` parameter was not passed properly, causing subsequent function calls that rely on `observed` to misbehave if called with ``observed=True`` (:issue:`23970`)
+- Bug in :meth:`DataFrame.groupby` did not work correctly with ``observed=True`` when aggregating a specified column (:issue:`23970`)
 
 Reshaping
 ^^^^^^^^^

From 51816390d6a27148e6ca1b43da42e18a3e56a7d7 Mon Sep 17 00:00:00 2001
From: Koustav-Samaddar <koustav201197@gmail.com>
Date: Mon, 24 Dec 2018 23:12:15 +0000
Subject: [PATCH 05/14] Made requested changes to tests

---
 pandas/tests/groupby/test_categorical.py | 11 +++++++++++
 pandas/tests/groupby/test_groupby.py     | 12 ------------
 2 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index 264f2567e45c1..45b558ace3bc5 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -860,3 +860,14 @@ def test_groupby_multiindex_categorical_datetime():
     expected = pd.DataFrame(
         {'values': [0, 4, 8, 3, 4, 5, 6, np.nan, 2]}, index=idx)
     assert_frame_equal(result, expected)
+
+
+def test_groupby_agg_observed_true_single_column():
+    # GH-23970
+    expected = pd.DataFrame({
+        'a': pd.Series([1, 1, 2], dtype='category'),
+        'b': [1, 2, 2], 'x': [1, 2, 3]})
+
+    result = df.groupby(['a', 'b'], as_index=False, observed=True)['x'].sum()
+
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index e96d4a170aecc..6d9f60df45ec8 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -1694,15 +1694,3 @@ def test_groupby_agg_ohlc_non_first():
     result = df.groupby(pd.Grouper(freq='D')).agg(['sum', 'ohlc'])
 
     tm.assert_frame_equal(result, expected)
-
-
-def test_groupby_agg_observed_lost_err():
-    # GH-23970
-    df = pd.DataFrame({
-        'a': pd.Series([1, 1, 2], dtype='category'),
-        'b': [1, 2, 2], 'x': [1, 2, 3]})
-
-    expected = df
-    result = df.groupby(['a', 'b'], as_index=False, observed=True)['x'].sum()
-
-    tm.assert_frame_equal(result, expected)

From 4c68d417891180bc6d8cf4e4967275a88c316c93 Mon Sep 17 00:00:00 2001
From: Koustav-Samaddar <koustav201197@gmail.com>
Date: Tue, 25 Dec 2018 00:36:44 +0000
Subject: [PATCH 06/14] Fixed typo from incomplete find/replace

---
 pandas/tests/groupby/test_categorical.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index 45b558ace3bc5..c0c3662d4768a 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -868,6 +868,6 @@ def test_groupby_agg_observed_true_single_column():
         'a': pd.Series([1, 1, 2], dtype='category'),
         'b': [1, 2, 2], 'x': [1, 2, 3]})
 
-    result = df.groupby(['a', 'b'], as_index=False, observed=True)['x'].sum()
+    result = expected.groupby(['a', 'b'], as_index=False, observed=True)['x'].sum()
 
     tm.assert_frame_equal(result, expected)

From 77cf3c6bb9579742209ce574b17b464e5685b0cc Mon Sep 17 00:00:00 2001
From: Koustav-Samaddar <koustav201197@gmail.com>
Date: Tue, 25 Dec 2018 00:38:17 +0000
Subject: [PATCH 07/14] Fixed pep8 violation

---
 pandas/tests/groupby/test_categorical.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index c0c3662d4768a..d7588c80a9279 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -868,6 +868,7 @@ def test_groupby_agg_observed_true_single_column():
         'a': pd.Series([1, 1, 2], dtype='category'),
         'b': [1, 2, 2], 'x': [1, 2, 3]})
 
-    result = expected.groupby(['a', 'b'], as_index=False, observed=True)['x'].sum()
+    result = expected.groupby(['a', 'b'],
+        as_index=False, observed=True)['x'].sum()
 
     tm.assert_frame_equal(result, expected)

From 572d9c31e952ab9a29d21845b1d9502fd7e0d1ff Mon Sep 17 00:00:00 2001
From: Koustav-Samaddar <koustav201197@gmail.com>
Date: Tue, 25 Dec 2018 02:07:49 +0000
Subject: [PATCH 08/14] Fixed pep's E128 violation

---
 pandas/tests/groupby/test_categorical.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index d7588c80a9279..a1b45519c577c 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -866,7 +866,9 @@ def test_groupby_agg_observed_true_single_column():
     # GH-23970
     expected = pd.DataFrame({
         'a': pd.Series([1, 1, 2], dtype='category'),
-        'b': [1, 2, 2], 'x': [1, 2, 3]})
+        'b': [1, 2, 2],
+        'x': [1, 2, 3]
+    })
 
     result = expected.groupby(['a', 'b'],
         as_index=False, observed=True)['x'].sum()

From 8793995e72f63e8d1f40240b70b66688678b76a1 Mon Sep 17 00:00:00 2001
From: Koustav-Samaddar <koustav201197@gmail.com>
Date: Tue, 25 Dec 2018 02:43:11 +0000
Subject: [PATCH 09/14] Fixed another possible pep violation

---
 pandas/tests/groupby/test_categorical.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index a1b45519c577c..a3f483a8bd2ca 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -870,7 +870,7 @@ def test_groupby_agg_observed_true_single_column():
         'x': [1, 2, 3]
     })
 
-    result = expected.groupby(['a', 'b'],
-        as_index=False, observed=True)['x'].sum()
+    result = expected.groupby(
+        ['a', 'b'], as_index=False, observed=True)['x'].sum()
 
     tm.assert_frame_equal(result, expected)

From b75bc7bde9c3b3f79067691437c2f086bbd7cb9b Mon Sep 17 00:00:00 2001
From: Koustav-Samaddar <koustav201197@gmail.com>
Date: Wed, 26 Dec 2018 03:42:21 +0000
Subject: [PATCH 10/14] Fixed whatsnew entryto meet in the middle

---
 doc/source/whatsnew/v0.24.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index 768181a46ba08..790cc5399b36f 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -1560,7 +1560,7 @@ Groupby/Resample/Rolling
 - Bug in :meth:`pandas.core.groupby.GroupBy.rank` with ``method='dense'`` and ``pct=True`` when a group has only one member would raise a ``ZeroDivisionError`` (:issue:`23666`).
 - Calling :meth:`pandas.core.groupby.GroupBy.rank` with empty groups and ``pct=True`` was raising a ``ZeroDivisionError`` (:issue:`22519`)
 - Bug in :meth:`DataFrame.resample` when resampling ``NaT`` in ``TimeDeltaIndex`` (:issue:`13223`).
-- Bug in :meth:`DataFrame.groupby` did not work correctly with ``observed=True`` when aggregating a specified column (:issue:`23970`)
+- Bug in :meth:`DataFrame.groupby` did not respect the `observed` argument when selecting a column and instead always used ``observed=False`` (:issue:`23970`)
 
 Reshaping
 ^^^^^^^^^

From d87e159e77a515b4ae8d39634eefaaea9e6101df Mon Sep 17 00:00:00 2001
From: Koustav-Samaddar <koustav201197@gmail.com>
Date: Fri, 28 Dec 2018 03:16:45 +0000
Subject: [PATCH 11/14] Test has been modified to have parameterised as_index

---
 pandas/tests/groupby/test_categorical.py | 26 +++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index a3f483a8bd2ca..a5ced921f850b 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -862,15 +862,31 @@ def test_groupby_multiindex_categorical_datetime():
     assert_frame_equal(result, expected)
 
 
-def test_groupby_agg_observed_true_single_column():
+@pytest.mark.parametrize("as_index, expected", [
+    (True, pd.Series(
+        index=pd.MultiIndex.from_arrays(
+        [pd.Series([1, 1, 2], dtype='category'), 
+        [1, 2, 2]], names=['a', 'b']),
+        data=[1, 2, 3], name='x'
+    )),
+    (False, pd.DataFrame({
+        'a': pd.Series([1, 1, 2], dtype='category'),
+        'b': [1, 2, 2],
+        'x': [1, 2, 3]
+    }))
+])
+def test_groupby_agg_observed_true_single_column(as_index, expected):
     # GH-23970
-    expected = pd.DataFrame({
+    df = pd.DataFrame({
         'a': pd.Series([1, 1, 2], dtype='category'),
         'b': [1, 2, 2],
         'x': [1, 2, 3]
     })
 
-    result = expected.groupby(
-        ['a', 'b'], as_index=False, observed=True)['x'].sum()
+    result = df.groupby(
+        ['a', 'b'], as_index=as_index, observed=True)['x'].sum()
 
-    tm.assert_frame_equal(result, expected)
+    if isinstance(result, pd.DataFrame):
+        tm.assert_frame_equal(result, expected)
+    else:
+        tm.assert_series_equal(result, expected)

From b01d809efe95aa33be7c474c3640221f731f7a44 Mon Sep 17 00:00:00 2001
From: Koustav-Samaddar <koustav201197@gmail.com>
Date: Fri, 28 Dec 2018 03:19:28 +0000
Subject: [PATCH 12/14] Fixed pep8 issues in test file

---
 pandas/tests/groupby/test_categorical.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index a5ced921f850b..60f12ae13bef5 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -865,8 +865,9 @@ def test_groupby_multiindex_categorical_datetime():
 @pytest.mark.parametrize("as_index, expected", [
     (True, pd.Series(
         index=pd.MultiIndex.from_arrays(
-        [pd.Series([1, 1, 2], dtype='category'), 
-        [1, 2, 2]], names=['a', 'b']),
+            [pd.Series([1, 1, 2], dtype='category'),
+                [1, 2, 2]], names=['a', 'b']
+        ),
         data=[1, 2, 3], name='x'
     )),
     (False, pd.DataFrame({

From 7a6898f1ff9935435d99fcbd8f8e73ddd2970e2b Mon Sep 17 00:00:00 2001
From: Koustav-Samaddar <koustav201197@gmail.com>
Date: Fri, 28 Dec 2018 03:20:30 +0000
Subject: [PATCH 13/14] Made recommended change to whatsnew

---
 doc/source/whatsnew/v0.24.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index 790cc5399b36f..bac7746879eda 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -1560,7 +1560,7 @@ Groupby/Resample/Rolling
 - Bug in :meth:`pandas.core.groupby.GroupBy.rank` with ``method='dense'`` and ``pct=True`` when a group has only one member would raise a ``ZeroDivisionError`` (:issue:`23666`).
 - Calling :meth:`pandas.core.groupby.GroupBy.rank` with empty groups and ``pct=True`` was raising a ``ZeroDivisionError`` (:issue:`22519`)
 - Bug in :meth:`DataFrame.resample` when resampling ``NaT`` in ``TimeDeltaIndex`` (:issue:`13223`).
-- Bug in :meth:`DataFrame.groupby` did not respect the `observed` argument when selecting a column and instead always used ``observed=False`` (:issue:`23970`)
+- Bug in :meth:`DataFrame.groupby` did not respect the ``observed`` argument when selecting a column and instead always used ``observed=False`` (:issue:`23970`)
 
 Reshaping
 ^^^^^^^^^

From 3e9178adf96421a03a2a20945e94670f5e334c81 Mon Sep 17 00:00:00 2001
From: Koustav-Samaddar <koustav201197@gmail.com>
Date: Mon, 31 Dec 2018 10:39:33 +0000
Subject: [PATCH 14/14] Changed test to use assert_equal

---
 pandas/tests/groupby/test_categorical.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index 54df021dd7b0d..a39600d114b89 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -888,10 +888,7 @@ def test_groupby_agg_observed_true_single_column(as_index, expected):
     result = df.groupby(
         ['a', 'b'], as_index=as_index, observed=True)['x'].sum()
 
-    if isinstance(result, pd.DataFrame):
-        tm.assert_frame_equal(result, expected)
-    else:
-        tm.assert_series_equal(result, expected)
+    assert_equal(result, expected)
 
 
 @pytest.mark.parametrize('fill_value', [None, np.nan, pd.NaT])