Skip to content

Commit 9ed9c37

Browse files
committed
Merge pull request #6655 from jreback/groupby
DOC/API: pd.Grouper docs / api
2 parents 5322695 + 0719f21 commit 9ed9c37

File tree

6 files changed

+42
-29
lines changed

6 files changed

+42
-29
lines changed

doc/source/api.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1193,6 +1193,7 @@ Indexing, iteration
11931193
GroupBy.groups
11941194
GroupBy.indices
11951195
GroupBy.get_group
1196+
Grouper
11961197

11971198
Function application
11981199
~~~~~~~~~~~~~~~~~~~~

doc/source/release.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ API Changes
132132
``FutureWarning`` is raised to alert that the old ``rows`` and ``cols`` arguments
133133
will not be supported in a future release (:issue:`5505`)
134134

135-
- Allow specification of a more complex groupby, via ``pd.Groupby`` (:issue:`3794`)
135+
- Allow specification of a more complex groupby, via ``pd.Grouper`` (:issue:`3794`)
136136

137137
- A tuple passed to ``DataFame.sort_index`` will be interpreted as the levels of
138138
the index, rather than requiring a list of tuple (:issue:`4370`)

doc/source/v0.14.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ These are out-of-bounds selections
9494

9595
g.nth(0, dropna='any') # similar to old behaviour
9696

97-
- Allow specification of a more complex groupby via ``pd.Groupby``, such as grouping
97+
- Allow specification of a more complex groupby via ``pd.Grouper``, such as grouping
9898
by a Time and a string field simultaneously. See :ref:`the docs <groupby.specify>`. (:issue:`3794`)
9999

100100
- Local variable usage has changed in

pandas/core/groupby.py

+27-15
Original file line numberDiff line numberDiff line change
@@ -141,25 +141,37 @@ def _last(x):
141141

142142
class Grouper(object):
143143
"""
144-
A Grouper allows the user to specify a groupby instruction
144+
A Grouper allows the user to specify a groupby instruction for a target object
145+
146+
This specification will select a column via the key parameter, or if the level and/or
147+
axis parameters are given, a level of the index of the target object.
148+
149+
These are local specifications and will override 'global' settings, that is the parameters
150+
axis and level which are passed to the groupby itself.
145151
146152
Parameters
147153
----------
148-
key : groupby key, default None
149-
level : name, int level number, default None
150-
freq : string / freqency object, default None
151-
sort : boolean, whether to sort the resulting labels, default True
154+
key : string, defaults to None
155+
groupby key, which selects the grouping column of the target
156+
level : name/number, defaults to None
157+
the level for the target index
158+
freq : string / freqency object, defaults to None
159+
This will groupby the specified frequency if the target selection (via key or level) is
160+
a datetime-like object
161+
axis : number/name of the axis, defaults to None
162+
sort : boolean, default to False
163+
whether to sort the resulting labels
152164
153165
Returns
154166
-------
155167
A specification for a groupby instruction
156168
157169
Examples
158170
--------
159-
df.groupby(Group(key='A')) : syntatic sugar for df.groupby('A')
160-
df.groupby(Group(key='date',freq='60s')) : specify a resample on the column 'date'
161-
df.groupby(Group(level='date',freq='60s',axis=1)) :
162-
specify a resample on the level 'date' on the columns axis with a frequency of 60s
171+
>>> df.groupby(Grouper(key='A')) : syntatic sugar for df.groupby('A')
172+
>>> df.groupby(Grouper(key='date',freq='60s')) : specify a resample on the column 'date'
173+
>>> df.groupby(Grouper(level='date',freq='60s',axis=1)) :
174+
specify a resample on the level 'date' on the columns axis with a frequency of 60s
163175
164176
"""
165177

@@ -186,7 +198,7 @@ def __init__(self, key=None, level=None, freq=None, axis=None, sort=False):
186198
def ax(self):
187199
return self.grouper
188200

189-
def get_grouper(self, obj):
201+
def _get_grouper(self, obj):
190202

191203
"""
192204
Parameters
@@ -198,10 +210,10 @@ def get_grouper(self, obj):
198210
a tuple of binner, grouper, obj (possibly sorted)
199211
"""
200212

201-
self.set_grouper(obj)
213+
self._set_grouper(obj)
202214
return self.binner, self.grouper, self.obj
203215

204-
def set_grouper(self, obj, sort=False):
216+
def _set_grouper(self, obj, sort=False):
205217
"""
206218
given an object and the specifcations, setup the internal grouper for this particular specification
207219
@@ -252,7 +264,7 @@ def set_grouper(self, obj, sort=False):
252264
self.grouper = ax
253265
return self.grouper
254266

255-
def get_binner_for_grouping(self, obj):
267+
def _get_binner_for_grouping(self, obj):
256268
raise NotImplementedError
257269

258270
@property
@@ -1685,7 +1697,7 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None,
16851697
elif isinstance(self.grouper, Grouper):
16861698

16871699
# get the new grouper
1688-
grouper = self.grouper.get_binner_for_grouping(self.obj)
1700+
grouper = self.grouper._get_binner_for_grouping(self.obj)
16891701
self.obj = self.grouper.obj
16901702
self.grouper = grouper
16911703
if self.name is None:
@@ -1795,7 +1807,7 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True):
17951807

17961808
# a passed in Grouper, directly convert
17971809
if isinstance(key, Grouper):
1798-
binner, grouper, obj = key.get_grouper(obj)
1810+
binner, grouper, obj = key._get_grouper(obj)
17991811
return grouper, [], obj
18001812

18011813
# already have a BaseGrouper, just return it

pandas/tseries/resample.py

+10-10
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ def __init__(self, freq='Min', closed=None, label=None, how='mean',
7676
super(TimeGrouper, self).__init__(freq=freq, axis=axis, **kwargs)
7777

7878
def resample(self, obj):
79-
self.set_grouper(obj, sort=True)
79+
self._set_grouper(obj, sort=True)
8080
ax = self.grouper
8181

8282
if isinstance(ax, DatetimeIndex):
@@ -93,7 +93,7 @@ def resample(self, obj):
9393
rs = self._resample_periods()
9494
else:
9595
obj = self.obj.to_timestamp(how=self.convention)
96-
self.set_grouper(obj)
96+
self._set_grouper(obj)
9797
rs = self._resample_timestamps()
9898
elif len(ax) == 0:
9999
return self.obj
@@ -104,11 +104,11 @@ def resample(self, obj):
104104
rs_axis.name = ax.name
105105
return rs
106106

107-
def get_grouper(self, obj):
108-
self.set_grouper(obj)
109-
return self.get_binner_for_resample()
107+
def _get_grouper(self, obj):
108+
self._set_grouper(obj)
109+
return self._get_binner_for_resample()
110110

111-
def get_binner_for_resample(self):
111+
def _get_binner_for_resample(self):
112112
# create the BinGrouper
113113
# assume that self.set_grouper(obj) has already been called
114114

@@ -121,12 +121,12 @@ def get_binner_for_resample(self):
121121
self.grouper = BinGrouper(bins, binlabels)
122122
return self.binner, self.grouper, self.obj
123123

124-
def get_binner_for_grouping(self, obj):
124+
def _get_binner_for_grouping(self, obj):
125125
# return an ordering of the transformed group labels,
126126
# suitable for multi-grouping, e.g the labels for
127127
# the resampled intervals
128-
ax = self.set_grouper(obj)
129-
self.get_binner_for_resample()
128+
ax = self._set_grouper(obj)
129+
self._get_binner_for_resample()
130130

131131
# create the grouper
132132
binner = self.binner
@@ -233,7 +233,7 @@ def _resample_timestamps(self):
233233
# assumes set_grouper(obj) already called
234234
axlabels = self.ax
235235

236-
self.get_binner_for_resample()
236+
self._get_binner_for_resample()
237237
grouper = self.grouper
238238
binner = self.binner
239239
obj = self.obj

pandas/tseries/tests/test_resample.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1134,7 +1134,7 @@ def test_apply_iteration(self):
11341134
df = DataFrame({'open': 1, 'close': 2}, index=ind)
11351135
tg = TimeGrouper('M')
11361136

1137-
_, grouper, _ = tg.get_grouper(df)
1137+
_, grouper, _ = tg._get_grouper(df)
11381138

11391139
# Errors
11401140
grouped = df.groupby(grouper, group_keys=False)
@@ -1151,7 +1151,7 @@ def test_panel_aggregation(self):
11511151
minor_axis=['A', 'B', 'C', 'D'])
11521152

11531153
tg = TimeGrouper('M', axis=1)
1154-
_, grouper, _ = tg.get_grouper(wp)
1154+
_, grouper, _ = tg._get_grouper(wp)
11551155
bingrouped = wp.groupby(grouper)
11561156
binagg = bingrouped.mean()
11571157

0 commit comments

Comments
 (0)