Skip to content

Commit 2c79a50

Browse files
stephen-hooverjreback
authored andcommitted
ENH BinGrouper use BaseGrouper's apply
The `BinGrouper.apply` and `BaseGrouper.apply` have different output types. To make them consistent, remove `BinGrouper.apply` and let it use the same method as the superclass `BaseGrouper`. This requires changing `BinGrouper.groupings` to return a list of `Grouping` objects (there will always only be one) instead of `None`. closes #12362 closes #11742
1 parent e61241c commit 2c79a50

File tree

3 files changed

+70
-21
lines changed

3 files changed

+70
-21
lines changed

doc/source/whatsnew/v0.18.1.txt

+32
Original file line numberDiff line numberDiff line change
@@ -79,15 +79,47 @@ API changes
7979

8080

8181
- ``CParserError`` is now a ``ValueError`` instead of just an ``Exception`` (:issue:`12551`)
82+
8283
- ``pd.show_versions()`` now includes ``pandas_datareader`` version (:issue:`12740`)
8384

85+
.. _whatsnew_0181.apply_resample:
86+
87+
Using ``.apply`` on groupby resampling
88+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
89+
90+
Using ``apply`` on resampling groupby operations (using a ``pd.TimeGrouper``) now has the same output types as similar ``apply``s on other groupby operations. (:issue:`11742`).
91+
92+
.. ipython:: python
93+
94+
df = pd.DataFrame({'date': pd.to_datetime(['10/10/2000', '11/10/2000']), 'value': [10, 13]})
95+
df
8496

97+
Previous behavior:
8598

99+
.. code-block:: python
86100

101+
In [1]: df.groupby(pd.TimeGrouper(key='date', freq='M')).apply(lambda x: x.value.sum())
102+
Out[1]:
103+
...
104+
TypeError: cannot concatenate a non-NDFrame object
87105

106+
# Output is a Series
107+
In [2]: df.groupby(pd.TimeGrouper(key='date', freq='M')).apply(lambda x: x[['value']].sum())
108+
Out[2]:
109+
date
110+
2000-10-31 value 10
111+
2000-11-30 value 13
112+
dtype: int64
88113

114+
New Behavior:
115+
116+
.. ipython:: python
89117

118+
# Output is a Series
119+
df.groupby(pd.TimeGrouper(key='date', freq='M')).apply(lambda x: x.value.sum())
90120

121+
# Output is a DataFrame
122+
df.groupby(pd.TimeGrouper(key='date', freq='M')).apply(lambda x: x[['value']].sum())
91123

92124

93125
.. _whatsnew_0181.deprecations:

pandas/core/groupby.py

+2-21
Original file line numberDiff line numberDiff line change
@@ -2004,25 +2004,6 @@ def get_iterator(self, data, axis=0):
20042004
if start < length:
20052005
yield self.binlabels[-1], slicer(start, None)
20062006

2007-
def apply(self, f, data, axis=0):
2008-
result_keys = []
2009-
result_values = []
2010-
mutated = False
2011-
for key, group in self.get_iterator(data, axis=axis):
2012-
object.__setattr__(group, 'name', key)
2013-
2014-
# group might be modified
2015-
group_axes = _get_axes(group)
2016-
res = f(group)
2017-
2018-
if not _is_indexed_like(res, group_axes):
2019-
mutated = True
2020-
2021-
result_keys.append(key)
2022-
result_values.append(res)
2023-
2024-
return result_keys, result_values, mutated
2025-
20262007
@cache_readonly
20272008
def indices(self):
20282009
indices = collections.defaultdict(list)
@@ -2071,8 +2052,8 @@ def names(self):
20712052

20722053
@property
20732054
def groupings(self):
2074-
# for compat
2075-
return None
2055+
return [Grouping(lvl, lvl, in_axis=False, level=None, name=name)
2056+
for lvl, name in zip(self.levels, self.names)]
20762057

20772058
def agg_series(self, obj, func):
20782059
dummy = obj[:0]

pandas/tests/test_groupby.py

+36
Original file line numberDiff line numberDiff line change
@@ -4824,6 +4824,42 @@ def test_timegrouper_get_group(self):
48244824
result = grouped.get_group(dt)
48254825
assert_frame_equal(result, expected)
48264826

4827+
def test_timegrouper_apply_return_type_series(self):
4828+
# Using `apply` with the `TimeGrouper` should give the
4829+
# same return type as an `apply` with a `Grouper`.
4830+
# Issue #11742
4831+
df = pd.DataFrame({'date': ['10/10/2000', '11/10/2000'],
4832+
'value': [10, 13]})
4833+
df_dt = df.copy()
4834+
df_dt['date'] = pd.to_datetime(df_dt['date'])
4835+
4836+
def sumfunc_series(x):
4837+
return pd.Series([x['value'].sum()], ('sum',))
4838+
4839+
expected = df.groupby(pd.Grouper(key='date')).apply(sumfunc_series)
4840+
result = (df_dt.groupby(pd.TimeGrouper(freq='M', key='date'))
4841+
.apply(sumfunc_series))
4842+
assert_frame_equal(result.reset_index(drop=True),
4843+
expected.reset_index(drop=True))
4844+
4845+
def test_timegrouper_apply_return_type_value(self):
4846+
# Using `apply` with the `TimeGrouper` should give the
4847+
# same return type as an `apply` with a `Grouper`.
4848+
# Issue #11742
4849+
df = pd.DataFrame({'date': ['10/10/2000', '11/10/2000'],
4850+
'value': [10, 13]})
4851+
df_dt = df.copy()
4852+
df_dt['date'] = pd.to_datetime(df_dt['date'])
4853+
4854+
def sumfunc_value(x):
4855+
return x.value.sum()
4856+
4857+
expected = df.groupby(pd.Grouper(key='date')).apply(sumfunc_value)
4858+
result = (df_dt.groupby(pd.TimeGrouper(freq='M', key='date'))
4859+
.apply(sumfunc_value))
4860+
assert_series_equal(result.reset_index(drop=True),
4861+
expected.reset_index(drop=True))
4862+
48274863
def test_cumcount(self):
48284864
df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A'])
48294865
g = df.groupby('A')

0 commit comments

Comments
 (0)