Skip to content

Commit 1cd1026

Browse files
jcristjorisvandenbossche
authored andcommitted
Groupby getitem works with all index types (#13731)
Previously `df.groupby(0)[df.columns]` would fail if all column names were integers (meaning `df.columns` was an `Int64Index`). This was because the implementation of `__getitem__` in `SelectionMixin` was checking for `ABCIndex` when it probably should have checked for `ABCIndexClass`.
1 parent 5a3b071 commit 1cd1026

File tree

3 files changed

+20
-3
lines changed

3 files changed

+20
-3
lines changed

doc/source/whatsnew/v0.19.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -750,6 +750,7 @@ Bug Fixes
750750

751751
- Bug in ``Categorical.remove_unused_categories()`` changes ``.codes`` dtype to platform int (:issue:`13261`)
752752
- Bug in ``groupby`` with ``as_index=False`` returns all NaN's when grouping on multiple columns including a categorical one (:issue:`13204`)
753+
- Bug in ``df.groupby(...)[...]`` where getitem with ``Int64Index`` raised an error (:issue:`13731`)
753754

754755
- Bug where ``pd.read_gbq()`` could throw ``ImportError: No module named discovery`` as a result of a naming conflict with another python package called apiclient (:issue:`13454`)
755756
- Bug in ``Index.union`` returns an incorrect result with a named empty index (:issue:`13432`)

pandas/core/base.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import numpy as np
77

88
from pandas.types.missing import isnull
9-
from pandas.types.generic import ABCDataFrame, ABCSeries, ABCIndex
9+
from pandas.types.generic import ABCDataFrame, ABCSeries, ABCIndexClass
1010
from pandas.types.common import (_ensure_object, is_object_dtype,
1111
is_list_like, is_scalar)
1212

@@ -299,7 +299,7 @@ def name(self):
299299
@property
300300
def _selection_list(self):
301301
if not isinstance(self._selection, (list, tuple, ABCSeries,
302-
ABCIndex, np.ndarray)):
302+
ABCIndexClass, np.ndarray)):
303303
return [self._selection]
304304
return self._selection
305305

@@ -330,7 +330,7 @@ def __getitem__(self, key):
330330
if self._selection is not None:
331331
raise Exception('Column(s) %s already selected' % self._selection)
332332

333-
if isinstance(key, (list, tuple, ABCSeries, ABCIndex,
333+
if isinstance(key, (list, tuple, ABCSeries, ABCIndexClass,
334334
np.ndarray)):
335335
if len(self.obj.columns.intersection(key)) != len(key):
336336
bad_keys = list(set(key).difference(self.obj.columns))

pandas/tests/test_groupby.py

+16
Original file line numberDiff line numberDiff line change
@@ -3769,6 +3769,22 @@ def test_getitem_list_of_columns(self):
37693769
assert_frame_equal(result2, expected)
37703770
assert_frame_equal(result3, expected)
37713771

3772+
def test_getitem_numeric_column_names(self):
3773+
# GH #13731
3774+
df = DataFrame({0: list('abcd') * 2,
3775+
2: np.random.randn(8),
3776+
4: np.random.randn(8),
3777+
6: np.random.randn(8)})
3778+
result = df.groupby(0)[df.columns[1:3]].mean()
3779+
result2 = df.groupby(0)[2, 4].mean()
3780+
result3 = df.groupby(0)[[2, 4]].mean()
3781+
3782+
expected = df.ix[:, [0, 2, 4]].groupby(0).mean()
3783+
3784+
assert_frame_equal(result, expected)
3785+
assert_frame_equal(result2, expected)
3786+
assert_frame_equal(result3, expected)
3787+
37723788
def test_agg_multiple_functions_maintain_order(self):
37733789
# GH #610
37743790
funcs = [('mean', np.mean), ('max', np.max), ('min', np.min)]

0 commit comments

Comments
 (0)