diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt index f20b961455ba7..f6873cca02dbe 100644 --- a/doc/source/whatsnew/v0.18.1.txt +++ b/doc/source/whatsnew/v0.18.1.txt @@ -211,3 +211,4 @@ Bug Fixes - Bug in ``.describe()`` resets categorical columns information (:issue:`11558`) - Bug where ``loffset`` argument was not applied when calling ``resample().count()`` on a timeseries (:issue:`12725`) - ``pd.read_excel()`` now accepts path objects (e.g. ``pathlib.Path``, ``py.path.local``) for the file path, in line with other ``read_*`` functions (:issue:`12655`) +- Bug in ``GroupBy.filter`` when ``dropna=False`` and no groups fulfilled the criteria (:issue:`12768`) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 066afc55e442f..cde7c40054847 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -820,7 +820,7 @@ def _concat_objects(self, keys, values, not_indexed_same=False): def _apply_filter(self, indices, dropna): if len(indices) == 0: - indices = [] + indices = np.array([]) else: indices = np.sort(np.concatenate(indices)) if dropna: diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 28038e02b64ca..bfad309b6cd50 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -4990,6 +4990,21 @@ def test_filter_out_no_groups(self): filtered = grouped.filter(lambda x: x['A'].mean() > 0) assert_frame_equal(filtered, df) + def test_filter_out_all_groups_in_df_dropna_false(self): + # GH12768 + df = pd.DataFrame({'a': [1, 1, 2], 'b': [1, 2, 0]}) + res = df.groupby('a') + res = res.filter(lambda x: x['b'].sum() > 5, dropna=False) + expected = pd.DataFrame({'a': [nan] * 3, 'b': [nan] * 3}) + assert_frame_equal(expected, res) + + def test_filter_out_all_groups_in_df_dropna_true(self): + df = pd.DataFrame({'a': [1, 1, 2], 'b': [1, 2, 0]}) + res = df.groupby('a') + res = res.filter(lambda x: x['b'].sum() > 5, dropna=True) + expected = pd.DataFrame({'a': [], 'b': []}, dtype="int64") + assert_frame_equal(expected, res) + def test_filter_condition_raises(self): def raise_if_sum_is_zero(x): if x.sum() == 0: