Skip to content

Commit 64977f1

Browse files
jonaslbjreback
authored andcommitted
BUG: Filtering a groupby object with dropnan=False and no groups fulfilling req.
closes #12768 closes #12776
1 parent 2d13410 commit 64977f1

File tree

3 files changed

+16
-1
lines changed

3 files changed

+16
-1
lines changed

doc/source/whatsnew/v0.18.1.txt

+1
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,7 @@ Bug Fixes
185185
- Bug in ``Series`` construction with ``Categorical`` and ``dtype='category'`` is specified (:issue:`12574`)
186186
- Bugs in concatenation with a coercable dtype was too aggressive. (:issue:`12411`, :issue:`12045`, :issue:`11594`, :issue:`10571`)
187187
- Bug in ``float_format`` option with option not being validated as a callable. (:issue:`12706`)
188+
- Bug in ``GroupBy.filter`` when ``dropna=False`` and no groups fulfilled the criteria (:issue:`12768`)
188189

189190

190191

pandas/core/groupby.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -820,7 +820,7 @@ def _concat_objects(self, keys, values, not_indexed_same=False):
820820

821821
def _apply_filter(self, indices, dropna):
822822
if len(indices) == 0:
823-
indices = []
823+
indices = np.array([], dtype='int64')
824824
else:
825825
indices = np.sort(np.concatenate(indices))
826826
if dropna:

pandas/tests/test_groupby.py

+14
Original file line numberDiff line numberDiff line change
@@ -4990,6 +4990,20 @@ def test_filter_out_no_groups(self):
49904990
filtered = grouped.filter(lambda x: x['A'].mean() > 0)
49914991
assert_frame_equal(filtered, df)
49924992

4993+
def test_filter_out_all_groups_in_df(self):
4994+
# GH12768
4995+
df = pd.DataFrame({'a': [1, 1, 2], 'b': [1, 2, 0]})
4996+
res = df.groupby('a')
4997+
res = res.filter(lambda x: x['b'].sum() > 5, dropna=False)
4998+
expected = pd.DataFrame({'a': [nan] * 3, 'b': [nan] * 3})
4999+
assert_frame_equal(expected, res)
5000+
5001+
df = pd.DataFrame({'a': [1, 1, 2], 'b': [1, 2, 0]})
5002+
res = df.groupby('a')
5003+
res = res.filter(lambda x: x['b'].sum() > 5, dropna=True)
5004+
expected = pd.DataFrame({'a': [], 'b': []}, dtype="int64")
5005+
assert_frame_equal(expected, res)
5006+
49935007
def test_filter_condition_raises(self):
49945008
def raise_if_sum_is_zero(x):
49955009
if x.sum() == 0:

0 commit comments

Comments
 (0)