Skip to content

Commit 6994240

Browse files
committed
ENH: allow .rolling / .expanding as groupby methods
closes #12738 closes #12486 closes #12363 Author: Jeff Reback <[email protected]> Closes #12743 from jreback/expand and squashes the following commits: f98e6f8 [Jeff Reback] ENH: allow .rolling / .expanding as groupby methods
1 parent 1d7c1e3 commit 6994240

File tree

12 files changed

+719
-132
lines changed

12 files changed

+719
-132
lines changed

doc/source/whatsnew/v0.18.1.txt

+54
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ Highlights include:
1818

1919
.. _whatsnew_0181.new_features:
2020

21+
- ``.groupby(...)`` has been enhanced to provide convenient syntax when working with ``.rolling(..)``, ``.expanding(..)`` and ``.resample(..)`` per group, see :ref:`here <whatsnew_0181.deferred_ops>`
22+
2123
New features
2224
~~~~~~~~~~~~
2325

@@ -48,6 +50,55 @@ see :ref:`Custom Business Hour <timeseries.custombusinesshour>` (:issue:`11514`)
4850
Enhancements
4951
~~~~~~~~~~~~
5052

53+
.. _whatsnew_0181.deferred_ops:
54+
55+
``.groupby(..)`` syntax with window and resample operations
56+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
57+
58+
``.groupby(...)`` has been enhanced to provide convenient syntax when working with ``.rolling(..)``, ``.expanding(..)`` and ``.resample(..)`` per group, see (:issue:`12486`, :issue:`12738`).
59+
60+
You can now use ``.rolling(..)`` and ``.expanding(..)`` as methods on groupbys. These return another deferred object (similar to what ``.rolling()`` and ``.expanding()`` do on ungrouped pandas objects). You can then operate on these ``RollingGroupby`` objects in a similar manner.
61+
62+
Previously you would have to do this to get a rolling window mean per-group:
63+
64+
.. ipython:: python
65+
66+
df = pd.DataFrame({'A': [1] * 20 + [2] * 12 + [3] * 8,
67+
'B': np.arange(40)})
68+
df
69+
70+
.. ipython:: python
71+
72+
df.groupby('A').apply(lambda x: x.rolling(4).B.mean())
73+
74+
Now you can do:
75+
76+
.. ipython:: python
77+
78+
df.groupby('A').rolling(4).B.mean()
79+
80+
For ``.resample(..)`` type of operations, previously you would have to:
81+
82+
.. ipython:: python
83+
84+
df = pd.DataFrame({'date': pd.date_range(start='2016-01-01',
85+
periods=4,
86+
freq='W'),
87+
'group': [1, 1, 2, 2],
88+
'val': [5, 6, 7, 8]}).set_index('date')
89+
90+
df
91+
92+
.. ipython:: python
93+
94+
df.groupby('group').apply(lambda x: x.resample('1D').ffill())
95+
96+
Now you can do:
97+
98+
.. ipython:: python
99+
100+
df.groupby('group').resample('1D').ffill()
101+
51102
.. _whatsnew_0181.partial_string_indexing:
52103

53104
Partial string indexing on ``DateTimeIndex`` when part of a ``MultiIndex``
@@ -374,6 +425,9 @@ Bug Fixes
374425
- Bug in ``pd.crosstab()`` where would silently ignore ``aggfunc`` if ``values=None`` (:issue:`12569`).
375426

376427

428+
- Bug in consistency of ``.name`` on ``.groupby(..).apply(..)`` cases (:issue:`12363`)
429+
430+
377431
- Bug in ``Timestamp.__repr__`` that caused ``pprint`` to fail in nested structures (:issue:`12622`)
378432
- Bug in ``Timedelta.min`` and ``Timedelta.max``, the properties now report the true minimum/maximum ``timedeltas`` as recognized by Pandas. See :ref:`documentation <timedeltas.limitations>`. (:issue:`12727`)
379433
- Bug in ``.quantile()`` with interpolation may coerce to ``float`` unexpectedly (:issue:`12772`)

pandas/core/base.py

+60
Original file line numberDiff line numberDiff line change
@@ -613,6 +613,19 @@ def _aggregate_multiple_funcs(self, arg, _level):
613613

614614
return concat(results, keys=keys, axis=1)
615615

616+
def _shallow_copy(self, obj=None, obj_type=None, **kwargs):
617+
""" return a new object with the replacement attributes """
618+
if obj is None:
619+
obj = self._selected_obj.copy()
620+
if obj_type is None:
621+
obj_type = self._constructor
622+
if isinstance(obj, obj_type):
623+
obj = obj.obj
624+
for attr in self._attributes:
625+
if attr not in kwargs:
626+
kwargs[attr] = getattr(self, attr)
627+
return obj_type(obj, **kwargs)
628+
616629
def _is_cython_func(self, arg):
617630
""" if we define an internal function for this argument, return it """
618631
return self._cython_table.get(arg)
@@ -625,6 +638,53 @@ def _is_builtin_func(self, arg):
625638
return self._builtin_table.get(arg, arg)
626639

627640

641+
class GroupByMixin(object):
642+
""" provide the groupby facilities to the mixed object """
643+
644+
@staticmethod
645+
def _dispatch(name, *args, **kwargs):
646+
""" dispatch to apply """
647+
def outer(self, *args, **kwargs):
648+
def f(x):
649+
x = self._shallow_copy(x, groupby=self._groupby)
650+
return getattr(x, name)(*args, **kwargs)
651+
return self._groupby.apply(f)
652+
outer.__name__ = name
653+
return outer
654+
655+
def _gotitem(self, key, ndim, subset=None):
656+
"""
657+
sub-classes to define
658+
return a sliced object
659+
660+
Parameters
661+
----------
662+
key : string / list of selections
663+
ndim : 1,2
664+
requested ndim of result
665+
subset : object, default None
666+
subset to act on
667+
"""
668+
669+
# create a new object to prevent aliasing
670+
if subset is None:
671+
subset = self.obj
672+
673+
# we need to make a shallow copy of ourselves
674+
# with the same groupby
675+
kwargs = dict([(attr, getattr(self, attr))
676+
for attr in self._attributes])
677+
self = self.__class__(subset,
678+
groupby=self._groupby[key],
679+
parent=self,
680+
**kwargs)
681+
self._reset_cache()
682+
if subset.ndim == 2:
683+
if lib.isscalar(key) and key in subset or com.is_list_like(key):
684+
self._selection = key
685+
return self
686+
687+
628688
class FrozenList(PandasObject, list):
629689

630690
"""

pandas/core/generic.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -3711,7 +3711,7 @@ def clip_lower(self, threshold, axis=None):
37113711
return self.where(subset, threshold, axis=axis)
37123712

37133713
def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
3714-
group_keys=True, squeeze=False):
3714+
group_keys=True, squeeze=False, **kwargs):
37153715
"""
37163716
Group series using mapper (dict or key function, apply given function
37173717
to group, return result as series) or by a series of columns.
@@ -3763,7 +3763,8 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
37633763
raise TypeError("You have to supply one of 'by' and 'level'")
37643764
axis = self._get_axis_number(axis)
37653765
return groupby(self, by=by, axis=axis, level=level, as_index=as_index,
3766-
sort=sort, group_keys=group_keys, squeeze=squeeze)
3766+
sort=sort, group_keys=group_keys, squeeze=squeeze,
3767+
**kwargs)
37673768

37683769
def asfreq(self, freq, method=None, how=None, normalize=False):
37693770
"""

0 commit comments

Comments
 (0)