Skip to content

Commit ba5106e

Browse files
terrytangyuanjreback
authored andcommitted
BUG: Fixed bug in groupby(), and axis=1 with filter() throws IndexError, #11041
1 parent 4c9bcf1 commit ba5106e

File tree

3 files changed

+10
-12
lines changed

3 files changed

+10
-12
lines changed

doc/source/whatsnew/v0.17.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1021,7 +1021,7 @@ Bug Fixes
10211021
key (:issue:`10385`).
10221022

10231023
- Bug in ``groupby(sort=False)`` with datetime-like ``Categorical`` raises ``ValueError`` (:issue:`10505`)
1024-
1024+
- Bug in ``groupby(axis=1)`` with ``filter()`` throws ``IndexError`` (:issue:`11041`)
10251025
- Bug in ``test_categorical`` on big-endian builds (:issue:`10425`)
10261026
- Bug in ``Series.shift`` and ``DataFrame.shift`` not supporting categorical data (:issue:`9416`)
10271027
- Bug in ``Series.map`` using categorical ``Series`` raises ``AttributeError`` (:issue:`10324`)

pandas/core/groupby.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1229,7 +1229,7 @@ def _apply_filter(self, indices, dropna):
12291229
else:
12301230
indices = np.sort(np.concatenate(indices))
12311231
if dropna:
1232-
filtered = self._selected_obj.take(indices)
1232+
filtered = self._selected_obj.take(indices, axis=self.axis)
12331233
else:
12341234
mask = np.empty(len(self._selected_obj.index), dtype=bool)
12351235
mask.fill(False)

pandas/tests/test_groupby.py

+8-10
Original file line numberDiff line numberDiff line change
@@ -494,7 +494,6 @@ def test_groupby_dict_mapping(self):
494494
assert_series_equal(result, expected2)
495495

496496
def test_groupby_bounds_check(self):
497-
import pandas as pd
498497
# groupby_X is code-generated, so if one variant
499498
# does, the rest probably do to
500499
a = np.array([1,2],dtype='object')
@@ -3979,7 +3978,6 @@ def test_groupby_datetime64_32_bit(self):
39793978
assert_series_equal(result,expected)
39803979

39813980
def test_groupby_categorical_unequal_len(self):
3982-
import pandas as pd
39833981
#GH3011
39843982
series = Series([np.nan, np.nan, 1, 1, 2, 2, 3, 3, 4, 4])
39853983
# The raises only happens with categorical, not with series of types category
@@ -4037,7 +4035,6 @@ def noddy(value, weight):
40374035
no_toes = df_grouped.apply(lambda x: noddy(x.value, x.weight ))
40384036

40394037
def test_groupby_with_empty(self):
4040-
import pandas as pd
40414038
index = pd.DatetimeIndex(())
40424039
data = ()
40434040
series = pd.Series(data, index)
@@ -4376,7 +4373,6 @@ def test_cumcount_groupby_not_col(self):
43764373
assert_series_equal(expected, sg.cumcount())
43774374

43784375
def test_filter_series(self):
4379-
import pandas as pd
43804376
s = pd.Series([1, 3, 20, 5, 22, 24, 7])
43814377
expected_odd = pd.Series([1, 3, 5, 7], index=[0, 1, 3, 6])
43824378
expected_even = pd.Series([20, 22, 24], index=[2, 4, 5])
@@ -4395,7 +4391,6 @@ def test_filter_series(self):
43954391
expected_even.reindex(s.index))
43964392

43974393
def test_filter_single_column_df(self):
4398-
import pandas as pd
43994394
df = pd.DataFrame([1, 3, 20, 5, 22, 24, 7])
44004395
expected_odd = pd.DataFrame([1, 3, 5, 7], index=[0, 1, 3, 6])
44014396
expected_even = pd.DataFrame([20, 22, 24], index=[2, 4, 5])
@@ -4414,7 +4409,6 @@ def test_filter_single_column_df(self):
44144409
expected_even.reindex(df.index))
44154410

44164411
def test_filter_multi_column_df(self):
4417-
import pandas as pd
44184412
df = pd.DataFrame({'A': [1, 12, 12, 1], 'B': [1, 1, 1, 1]})
44194413
grouper = df['A'].apply(lambda x: x % 2)
44204414
grouped = df.groupby(grouper)
@@ -4423,7 +4417,6 @@ def test_filter_multi_column_df(self):
44234417
grouped.filter(lambda x: x['A'].sum() - x['B'].sum() > 10), expected)
44244418

44254419
def test_filter_mixed_df(self):
4426-
import pandas as pd
44274420
df = pd.DataFrame({'A': [1, 12, 12, 1], 'B': 'a b c d'.split()})
44284421
grouper = df['A'].apply(lambda x: x % 2)
44294422
grouped = df.groupby(grouper)
@@ -4433,7 +4426,6 @@ def test_filter_mixed_df(self):
44334426
grouped.filter(lambda x: x['A'].sum() > 10), expected)
44344427

44354428
def test_filter_out_all_groups(self):
4436-
import pandas as pd
44374429
s = pd.Series([1, 3, 20, 5, 22, 24, 7])
44384430
grouper = s.apply(lambda x: x % 2)
44394431
grouped = s.groupby(grouper)
@@ -4446,7 +4438,6 @@ def test_filter_out_all_groups(self):
44464438
grouped.filter(lambda x: x['A'].sum() > 1000), df.ix[[]])
44474439

44484440
def test_filter_out_no_groups(self):
4449-
import pandas as pd
44504441
s = pd.Series([1, 3, 20, 5, 22, 24, 7])
44514442
grouper = s.apply(lambda x: x % 2)
44524443
grouped = s.groupby(grouper)
@@ -4459,7 +4450,6 @@ def test_filter_out_no_groups(self):
44594450
assert_frame_equal(filtered, df)
44604451

44614452
def test_filter_condition_raises(self):
4462-
import pandas as pd
44634453
def raise_if_sum_is_zero(x):
44644454
if x.sum() == 0:
44654455
raise ValueError
@@ -4471,6 +4461,14 @@ def raise_if_sum_is_zero(x):
44714461
self.assertRaises(TypeError,
44724462
lambda: grouped.filter(raise_if_sum_is_zero))
44734463

4464+
def test_filter_with_axis_in_groupby(self):
4465+
# issue 11041
4466+
index = pd.MultiIndex.from_product([range(10), [0, 1]])
4467+
data = pd.DataFrame(np.arange(100).reshape(-1, 20), columns=index, dtype='int64')
4468+
result = data.groupby(level=0, axis=1).filter(lambda x: x.iloc[0, 0] > 10)
4469+
expected = data.iloc[:,12:20]
4470+
assert_frame_equal(result, expected)
4471+
44744472
def test_filter_bad_shapes(self):
44754473
df = DataFrame({'A': np.arange(8), 'B': list('aabbbbcc'), 'C': np.arange(8)})
44764474
s = df['B']

0 commit comments

Comments
 (0)