Skip to content

Commit 134dd1f

Browse files
committed
BUG: handle as_index=False for pseudo multi-groupers (e.g. .describe())
1 parent f520e8d commit 134dd1f

File tree

3 files changed

+43
-9
lines changed

3 files changed

+43
-9
lines changed

doc/source/v0.14.0.txt

+7
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,13 @@ API changes
126126
g.count()
127127
g.describe()
128128

129+
passing ``as_index`` will leave the grouped column in-place (this is not change in 0.14.0)
130+
131+
df = DataFrame([[1, np.nan], [1, 4], [5, 6], [5, 8]], columns=['A', 'B'])
132+
g = df.groupby('A',as_index=False)
133+
g.count()
134+
g.describe()
135+
129136
- Allow specification of a more complex groupby via ``pd.Grouper``, such as grouping
130137
by a Time and a string field simultaneously. See :ref:`the docs <groupby.specify>`. (:issue:`3794`)
131138

pandas/core/groupby.py

+25-8
Original file line numberDiff line numberDiff line change
@@ -456,7 +456,7 @@ def _selected_obj(self):
456456
def _set_selection_from_grouper(self):
457457
""" we may need create a selection if we have non-level groupers """
458458
grp = self.grouper
459-
if self._selection is None and getattr(grp,'groupings',None) is not None:
459+
if self._selection is None and self.as_index and getattr(grp,'groupings',None) is not None:
460460
ax = self.obj._info_axis
461461
groupers = [ g.name for g in grp.groupings if g.level is None and g.name is not None and g.name in ax ]
462462
if len(groupers):
@@ -1029,12 +1029,23 @@ def _concat_objects(self, keys, values, not_indexed_same=False):
10291029
result = result.reindex(ax)
10301030
else:
10311031
result = result.reindex_axis(ax, axis=self.axis)
1032-
elif self.group_keys and self.as_index:
1033-
group_keys = keys
1034-
group_levels = self.grouper.levels
1035-
group_names = self.grouper.names
1036-
result = concat(values, axis=self.axis, keys=group_keys,
1037-
levels=group_levels, names=group_names)
1032+
1033+
elif self.group_keys:
1034+
1035+
if self.as_index:
1036+
1037+
# possible MI return case
1038+
group_keys = keys
1039+
group_levels = self.grouper.levels
1040+
group_names = self.grouper.names
1041+
result = concat(values, axis=self.axis, keys=group_keys,
1042+
levels=group_levels, names=group_names)
1043+
else:
1044+
1045+
# GH5610, returns a MI, with the first level being a
1046+
# range index
1047+
keys = list(range(len(values)))
1048+
result = concat(values, axis=self.axis, keys=keys)
10381049
else:
10391050
result = concat(values, axis=self.axis)
10401051

@@ -2528,6 +2539,7 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
25282539
elif hasattr(self.grouper, 'groupings'):
25292540
if len(self.grouper.groupings) > 1:
25302541
key_index = MultiIndex.from_tuples(keys, names=key_names)
2542+
25312543
else:
25322544
ping = self.grouper.groupings[0]
25332545
if len(keys) == ping.ngroups:
@@ -2540,8 +2552,13 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
25402552
# reorder the values
25412553
values = [values[i] for i in indexer]
25422554
else:
2555+
25432556
key_index = Index(keys, name=key_names[0])
25442557

2558+
# don't use the key indexer
2559+
if not self.as_index:
2560+
key_index = None
2561+
25452562
# make Nones an empty object
25462563
if com._count_not_none(*values) != len(values):
25472564
v = next(v for v in values if v is not None)
@@ -2611,7 +2628,7 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
26112628

26122629
# normally use vstack as its faster than concat
26132630
# and if we have mi-columns
2614-
if not _np_version_under1p7 or isinstance(v.index,MultiIndex):
2631+
if not _np_version_under1p7 or isinstance(v.index,MultiIndex) or key_index is None:
26152632
stacked_values = np.vstack([np.asarray(x) for x in values])
26162633
result = DataFrame(stacked_values,index=key_index,columns=index)
26172634
else:

pandas/tests/test_groupby.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -1378,7 +1378,8 @@ def test_groupby_as_index_apply(self):
13781378
res_not_as_apply = g_not_as.apply(lambda x: x.head(2)).index
13791379

13801380
# apply doesn't maintain the original ordering
1381-
exp_not_as_apply = Index([0, 2, 1, 4])
1381+
# changed in GH5610 as the as_index=False returns a MI here
1382+
exp_not_as_apply = MultiIndex.from_tuples([(0, 0), (0, 2), (1, 1), (2, 4)])
13821383
exp_as_apply = MultiIndex.from_tuples([(1, 0), (1, 2), (2, 1), (3, 4)])
13831384

13841385
assert_index_equal(res_as_apply, exp_as_apply)
@@ -1994,19 +1995,28 @@ def test_non_cython_api(self):
19941995

19951996
df = DataFrame([[1, 2, 'foo'], [1, nan, 'bar',], [3, nan, 'baz']], columns=['A', 'B','C'])
19961997
g = df.groupby('A')
1998+
gni = df.groupby('A',as_index=False)
19971999

19982000
# mad
19992001
expected = DataFrame([[0],[nan]],columns=['B'],index=[1,3])
20002002
expected.index.name = 'A'
20012003
result = g.mad()
20022004
assert_frame_equal(result,expected)
20032005

2006+
expected = DataFrame([[0.,0.],[0,nan]],columns=['A','B'],index=[0,1])
2007+
result = gni.mad()
2008+
assert_frame_equal(result,expected)
2009+
20042010
# describe
20052011
expected = DataFrame(dict(B = concat([df.loc[[0,1],'B'].describe(),df.loc[[2],'B'].describe()],keys=[1,3])))
20062012
expected.index.names = ['A',None]
20072013
result = g.describe()
20082014
assert_frame_equal(result,expected)
20092015

2016+
expected = concat([df.loc[[0,1],['A','B']].describe(),df.loc[[2],['A','B']].describe()],keys=[0,1])
2017+
result = gni.describe()
2018+
assert_frame_equal(result,expected)
2019+
20102020
# any
20112021
expected = DataFrame([[True, True],[False, True]],columns=['B','C'],index=[1,3])
20122022
expected.index.name = 'A'

0 commit comments

Comments
 (0)