Skip to content

Commit 65abb6b

Browse files
committed
BUG: Add reduce_if_possible keyword to groupby to allow reduction from
DataFrame -> Series if groups are unique. Regression from 0.10.1, partial revert on (GH2893_) with (GH3596_) CLN: renamed reduce_if_possible -> squeeze DOC: added v0.11.1 example
1 parent f34de9e commit 65abb6b

File tree

5 files changed

+57
-14
lines changed

5 files changed

+57
-14
lines changed

RELEASE.rst

+4
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,9 @@ pandas 0.11.1
6565
``timedelta64[ns]`` to ``object/int`` (GH3425_)
6666
- Do not allow datetimelike/timedeltalike creation except with valid types
6767
(e.g. cannot pass ``datetime64[ms]``) (GH3423_)
68+
- Add ``squeeze`` keyword to ``groupby`` to allow reduction from
69+
DataFrame -> Series if groups are unique. Regression from 0.10.1,
70+
partial revert on (GH2893_) with (GH3596_)
6871

6972
**Bug Fixes**
7073

@@ -161,6 +164,7 @@ pandas 0.11.1
161164
.. _GH3594: https://github.com/pydata/pandas/issues/3594
162165
.. _GH3590: https://github.com/pydata/pandas/issues/3590
163166
.. _GH3610: https://github.com/pydata/pandas/issues/3610
167+
.. _GH3596: https://github.com/pydata/pandas/issues/3596
164168
.. _GH3435: https://github.com/pydata/pandas/issues/3435
165169

166170

doc/source/v0.11.1.txt

+22
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,26 @@ API changes
2121
p / p
2222
p / 0
2323

24+
- Add ``squeeze`` keyword to ``groupby`` to allow reduction from
25+
DataFrame -> Series if groups are unique. This is a Regression from 0.10.1.
26+
We are reverting back to the prior behavior. This means groupby will return the
27+
same shaped objects whether the groups are unique or not. revert on (GH2893_)
28+
with (GH3596_).
29+
30+
.. ipython:: python
31+
32+
df2 = DataFrame([{"val1": 1, "val2" : 20}, {"val1":1, "val2": 19},
33+
{"val1":1, "val2": 27}, {"val1":1, "val2": 12}])
34+
def func(dataf):
35+
return dataf["val2"] - dataf["val2"].mean()
36+
37+
# squeezing the result frame to a series (because we have unique groups)
38+
df2.groupby("val1", squeeze=True).apply(func)
39+
40+
# no squeezing (the default, and behavior in 0.10.1)
41+
df2.groupby("val1").apply(func)
42+
43+
2444
Enhancements
2545
~~~~~~~~~~~~
2646
- ``pd.read_html()`` can now parse HTML string, files or urls and return dataframes
@@ -44,5 +64,7 @@ on GitHub for a complete list.
4464
.. _GH3477: https://github.com/pydata/pandas/issues/3477
4565
.. _GH3492: https://github.com/pydata/pandas/issues/3492
4666
.. _GH3499: https://github.com/pydata/pandas/issues/3499
67+
.. _GH2893: https://github.com/pydata/pandas/issues/2893
68+
.. _GH3596: https://github.com/pydata/pandas/issues/3596
4769
.. _GH3590: https://github.com/pydata/pandas/issues/3590
4870
.. _GH3435: https://github.com/pydata/pandas/issues/3435

pandas/core/generic.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ def get(self, key, default=None):
107107
return default
108108

109109
def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
110-
group_keys=True):
110+
group_keys=True, squeeze=False):
111111
"""
112112
Group series using mapper (dict or key function, apply given function
113113
to group, return result as series) or by a series of columns
@@ -131,6 +131,9 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
131131
Sort group keys. Get better performance by turning this off
132132
group_keys : boolean, default True
133133
When calling apply, add group keys to index to identify pieces
134+
squeeze : boolean, default False
135+
reduce the dimensionaility of the return type if possible, otherwise
136+
return a consistent type
134137
135138
Examples
136139
--------
@@ -150,7 +153,8 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
150153
from pandas.core.groupby import groupby
151154
axis = self._get_axis_number(axis)
152155
return groupby(self, by, axis=axis, level=level, as_index=as_index,
153-
sort=sort, group_keys=group_keys)
156+
sort=sort, group_keys=group_keys,
157+
squeeze=squeeze)
154158

155159
def asfreq(self, freq, method=None, how=None, normalize=False):
156160
"""

pandas/core/groupby.py

+17-9
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ class GroupBy(object):
169169

170170
def __init__(self, obj, keys=None, axis=0, level=None,
171171
grouper=None, exclusions=None, selection=None, as_index=True,
172-
sort=True, group_keys=True):
172+
sort=True, group_keys=True, squeeze=False):
173173
self._selection = selection
174174

175175
if isinstance(obj, NDFrame):
@@ -189,6 +189,7 @@ def __init__(self, obj, keys=None, axis=0, level=None,
189189
self.keys = keys
190190
self.sort = sort
191191
self.group_keys = group_keys
192+
self.squeeze = squeeze
192193

193194
if grouper is None:
194195
grouper, exclusions = _get_grouper(obj, keys, axis=axis,
@@ -1841,15 +1842,22 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
18411842
all_indexed_same = _all_indexes_same([x.index for x in values])
18421843
singular_series = len(values) == 1 and applied_index.nlevels == 1
18431844

1844-
# assign the name to this series
1845-
if singular_series:
1846-
values[0].name = keys[0]
1845+
# GH3596
1846+
# provide a reduction (Frame -> Series) if groups are unique
1847+
if self.squeeze:
18471848

1848-
# GH2893
1849-
# we have series in the values array, we want to produce a series:
1850-
# if any of the sub-series are not indexed the same
1851-
# OR we don't have a multi-index and we have only a single values
1852-
if singular_series or not all_indexed_same:
1849+
# assign the name to this series
1850+
if singular_series:
1851+
values[0].name = keys[0]
1852+
1853+
# GH2893
1854+
# we have series in the values array, we want to produce a series:
1855+
# if any of the sub-series are not indexed the same
1856+
# OR we don't have a multi-index and we have only a single values
1857+
return self._concat_objects(keys, values,
1858+
not_indexed_same=not_indexed_same)
1859+
1860+
if not all_indexed_same:
18531861
return self._concat_objects(keys, values,
18541862
not_indexed_same=not_indexed_same)
18551863

pandas/tests/test_groupby.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -263,24 +263,29 @@ def test_groupby_nonobject_dtype(self):
263263

264264
def test_groupby_return_type(self):
265265

266-
# GH2893
266+
# GH2893, return a reduced type
267267
df1 = DataFrame([{"val1": 1, "val2" : 20}, {"val1":1, "val2": 19},
268268
{"val1":2, "val2": 27}, {"val1":2, "val2": 12}])
269269

270270
def func(dataf):
271271
return dataf["val2"] - dataf["val2"].mean()
272272

273-
result = df1.groupby("val1").apply(func)
273+
result = df1.groupby("val1", squeeze=True).apply(func)
274274
self.assert_(isinstance(result,Series))
275275

276276
df2 = DataFrame([{"val1": 1, "val2" : 20}, {"val1":1, "val2": 19},
277277
{"val1":1, "val2": 27}, {"val1":1, "val2": 12}])
278278
def func(dataf):
279279
return dataf["val2"] - dataf["val2"].mean()
280280

281-
result = df2.groupby("val1").apply(func)
281+
result = df2.groupby("val1", squeeze=True).apply(func)
282282
self.assert_(isinstance(result,Series))
283283

284+
# GH3596, return a consistent type (regression in 0.11 from 0.10.1)
285+
df = DataFrame([[1,1],[1,1]],columns=['X','Y'])
286+
result = df.groupby('X',squeeze=False).count()
287+
self.assert_(isinstance(result,DataFrame))
288+
284289
def test_agg_regression1(self):
285290
grouped = self.tsframe.groupby([lambda x: x.year, lambda x: x.month])
286291
result = grouped.agg(np.mean)

0 commit comments

Comments
 (0)