diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 6263d5337f42f..a509acb3604e1 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -99,6 +99,13 @@ class Grouper: .. versionadded:: 1.1.0 + dropna : bool, default True + If True, and if group keys contain NA values, NA values together with + row/column will be dropped. If False, NA values will also be treated as + the key in groups. + + .. versionadded:: 1.2.0 + Returns ------- A specification for a groupby instruction @@ -820,7 +827,9 @@ def is_in_obj(gpr) -> bool: groupings.append(Grouping(Index([], dtype="int"), np.array([], dtype=np.intp))) # create the internals grouper - grouper = ops.BaseGrouper(group_axis, groupings, sort=sort, mutated=mutated) + grouper = ops.BaseGrouper( + group_axis, groupings, sort=sort, mutated=mutated, dropna=dropna + ) return grouper, exclusions, obj diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index b3f91d4623c84..17539cdf451e3 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -87,6 +87,7 @@ def __init__( group_keys: bool = True, mutated: bool = False, indexer: Optional[np.ndarray] = None, + dropna: bool = True, ): assert isinstance(axis, Index), axis @@ -97,6 +98,7 @@ def __init__( self.group_keys = group_keys self.mutated = mutated self.indexer = indexer + self.dropna = dropna @property def groupings(self) -> List["grouper.Grouping"]: diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py index deb73acbb158a..cd6c17955c18d 100644 --- a/pandas/tests/groupby/test_groupby_dropna.py +++ b/pandas/tests/groupby/test_groupby_dropna.py @@ -162,6 +162,14 @@ def test_groupby_dropna_series_by(dropna, expected): tm.assert_series_equal(result, expected) +@pytest.mark.parametrize("dropna", (False, True)) +def test_grouper_dropna_propagation(dropna): + # GH 36604 + df = pd.DataFrame({"A": [0, 0, 1, None], "B": [1, 2, 3, None]}) + gb = df.groupby("A", dropna=dropna) + assert gb.grouper.dropna == dropna + + @pytest.mark.parametrize( "dropna,df_expected,s_expected", [