From f0b500dd226aba364b0d5340ccc0ebe954de6b38 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Thu, 24 Sep 2020 15:23:44 +0000 Subject: [PATCH 1/8] add dropna attribute to BaseGrouper --- pandas/core/groupby/ops.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index b3f91d4623c84..17539cdf451e3 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -87,6 +87,7 @@ def __init__( group_keys: bool = True, mutated: bool = False, indexer: Optional[np.ndarray] = None, + dropna: bool = True, ): assert isinstance(axis, Index), axis @@ -97,6 +98,7 @@ def __init__( self.group_keys = group_keys self.mutated = mutated self.indexer = indexer + self.dropna = dropna @property def groupings(self) -> List["grouper.Grouping"]: From c91391661fdf8815bd188492edc4371ca27e2a89 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Thu, 24 Sep 2020 15:25:03 +0000 Subject: [PATCH 2/8] add tests --- pandas/tests/groupby/test_groupby_dropna.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py index deb73acbb158a..5ee94b63ba82d 100644 --- a/pandas/tests/groupby/test_groupby_dropna.py +++ b/pandas/tests/groupby/test_groupby_dropna.py @@ -162,6 +162,13 @@ def test_groupby_dropna_series_by(dropna, expected): tm.assert_series_equal(result, expected) +@pytest.mark.parametrize("dropna", (False, True)) +def test_Grouper_dropna_propagation(dropna): + df = pd.DataFrame({"A": [0, 0, 1, None], "B": [1, 2, 3, None]}) + gb = df.groupby("A", dropna=dropna) + assert gb.grouper.dropna is dropna + + @pytest.mark.parametrize( "dropna,df_expected,s_expected", [ From ff6e01305e4c1a2eb75a79507c2c4ae99953bc0d Mon Sep 17 00:00:00 2001 From: arw2019 Date: Thu, 24 Sep 2020 15:54:47 +0000 Subject: [PATCH 3/8] propagate dropna in get_grouper --- pandas/core/groupby/grouper.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 6263d5337f42f..b943323062982 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -820,7 +820,9 @@ def is_in_obj(gpr) -> bool: groupings.append(Grouping(Index([], dtype="int"), np.array([], dtype=np.intp))) # create the internals grouper - grouper = ops.BaseGrouper(group_axis, groupings, sort=sort, mutated=mutated) + grouper = ops.BaseGrouper( + group_axis, groupings, sort=sort, mutated=mutated, dropna=dropna + ) return grouper, exclusions, obj From d0cdc36071e310278023968712deaf0140eddce9 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Fri, 25 Sep 2020 01:29:48 +0000 Subject: [PATCH 4/8] feedback --- pandas/tests/groupby/test_groupby_dropna.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py index 5ee94b63ba82d..f8c588be5a294 100644 --- a/pandas/tests/groupby/test_groupby_dropna.py +++ b/pandas/tests/groupby/test_groupby_dropna.py @@ -163,10 +163,10 @@ def test_groupby_dropna_series_by(dropna, expected): @pytest.mark.parametrize("dropna", (False, True)) -def test_Grouper_dropna_propagation(dropna): +def test_grouper_dropna_propagation(dropna): df = pd.DataFrame({"A": [0, 0, 1, None], "B": [1, 2, 3, None]}) gb = df.groupby("A", dropna=dropna) - assert gb.grouper.dropna is dropna + assert gb.grouper.dropna == dropna @pytest.mark.parametrize( From 8e3c9fa8c2174bd7f9854358065e449ffa4fee05 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Fri, 25 Sep 2020 01:54:52 +0000 Subject: [PATCH 5/8] update Grouper doctring --- pandas/core/groupby/grouper.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index b943323062982..a509acb3604e1 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -99,6 +99,13 @@ class Grouper: .. versionadded:: 1.1.0 + dropna : bool, default True + If True, and if group keys contain NA values, NA values together with + row/column will be dropped. If False, NA values will also be treated as + the key in groups. + + .. versionadded:: 1.2.0 + Returns ------- A specification for a groupby instruction From 38a74ac0e0db96898ee11c23ccc6a43241c0cd07 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Fri, 25 Sep 2020 02:05:35 +0000 Subject: [PATCH 6/8] whatsnew --- doc/source/whatsnew/v1.2.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 2a8b6fe3ade6a..1fbb488f2f11f 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -349,6 +349,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrame.groupby` does not always maintain column index name for ``any``, ``all``, ``bfill``, ``ffill``, ``shift`` (:issue:`29764`) - Bug in :meth:`DataFrameGroupBy.apply` raising error with ``np.nan`` group(s) when ``dropna=False`` (:issue:`35889`) - Bug in :meth:`Rolling.sum()` returned wrong values when dtypes where mixed between float and integer and axis was equal to one (:issue:`20649`, :issue:`35596`) +- Bug in :class:`pd.Grouper` now propagates ``dropna`` argument correctly (:issue:`36620`) Reshaping ^^^^^^^^^ From 85f82b75bf2cd618da970b35caf3c6e9a8ae2e2d Mon Sep 17 00:00:00 2001 From: arw2019 Date: Fri, 25 Sep 2020 16:44:57 +0000 Subject: [PATCH 7/8] remove whatsnew note --- doc/source/whatsnew/v1.2.0.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 1fbb488f2f11f..2a8b6fe3ade6a 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -349,7 +349,6 @@ Groupby/resample/rolling - Bug in :meth:`DataFrame.groupby` does not always maintain column index name for ``any``, ``all``, ``bfill``, ``ffill``, ``shift`` (:issue:`29764`) - Bug in :meth:`DataFrameGroupBy.apply` raising error with ``np.nan`` group(s) when ``dropna=False`` (:issue:`35889`) - Bug in :meth:`Rolling.sum()` returned wrong values when dtypes where mixed between float and integer and axis was equal to one (:issue:`20649`, :issue:`35596`) -- Bug in :class:`pd.Grouper` now propagates ``dropna`` argument correctly (:issue:`36620`) Reshaping ^^^^^^^^^ From 61f0553b38b1df2f99fd7d14643e2bace6c69889 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Fri, 25 Sep 2020 16:51:13 +0000 Subject: [PATCH 8/8] added PR number to test --- pandas/tests/groupby/test_groupby_dropna.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py index f8c588be5a294..cd6c17955c18d 100644 --- a/pandas/tests/groupby/test_groupby_dropna.py +++ b/pandas/tests/groupby/test_groupby_dropna.py @@ -164,6 +164,7 @@ def test_groupby_dropna_series_by(dropna, expected): @pytest.mark.parametrize("dropna", (False, True)) def test_grouper_dropna_propagation(dropna): + # GH 36604 df = pd.DataFrame({"A": [0, 0, 1, None], "B": [1, 2, 3, None]}) gb = df.groupby("A", dropna=dropna) assert gb.grouper.dropna == dropna