@@ -139,6 +139,111 @@ def _last(x):
139
139
else :
140
140
return _last (x )
141
141
142
+ class Grouper (object ):
143
+ """
144
+ A Grouper allows the user to specify a groupby instruction
145
+
146
+ Parameters
147
+ ----------
148
+ key : groupby key, default None
149
+ level : name, int level number, default None
150
+ freq : string / freqency object, default None
151
+ sort : boolean, whether to sort the resulting labels, default True
152
+
153
+ Returns
154
+ -------
155
+ A specification for a groupby instruction
156
+
157
+ Examples
158
+ --------
159
+ df.groupby(Group(key='A')) : syntatic sugar for df.groupby('A')
160
+ df.groupby(Group(key='date',freq='60s')) : specify a resample on the column 'date'
161
+ df.groupby(Group(level='date',freq='60s',axis=1)) :
162
+ specify a resample on the level 'date' on the columns axis with a frequency of 60s
163
+
164
+ """
165
+
166
+ def __new__ (cls , * args , ** kwargs ):
167
+ if kwargs .get ('freq' ) is not None :
168
+ from pandas .tseries .resample import TimeGrouper
169
+ cls = TimeGrouper
170
+ return super (Grouper , cls ).__new__ (cls )
171
+
172
+ def __init__ (self , key = None , level = None , freq = None , axis = None , sort = True ):
173
+ self .key = key
174
+ self .level = level
175
+ self .freq = freq
176
+ self .axis = axis
177
+ self .sort = sort
178
+ self .grouper = None
179
+
180
+ def get_grouper (self , obj ):
181
+
182
+ """
183
+ Parameters
184
+ ----------
185
+ obj : the subject object
186
+
187
+ Returns
188
+ -------
189
+ a tuple of binner, grouper, obj (possibly sorted)
190
+ """
191
+
192
+ # default is to not use a binner
193
+ return None , self .get_grouper_for_ax (obj ), obj
194
+
195
+ def get_grouper_for_ax (self , obj ):
196
+ """
197
+ given an object and the specifcations, return a grouper for this particular specification
198
+
199
+ Parameters
200
+ ----------
201
+ obj : the subject object
202
+
203
+ Returns
204
+ -------
205
+ grouper : an index mapping, or a BinGrouper like object
206
+ """
207
+
208
+ if self .key is not None and self .level is not None :
209
+ raise ValueError ("The Grouper cannot specify both a key and a level!" )
210
+
211
+ # the key must be a valid info item
212
+ if self .key is not None :
213
+ key = self .key
214
+ if key not in obj ._info_axis :
215
+ raise KeyError ("The grouper name {0} is not found" .format (key ))
216
+ ax = Index (obj [key ],name = key )
217
+
218
+ else :
219
+ ax = obj ._get_axis (self .axis )
220
+ if self .level is not None :
221
+ level = self .level
222
+
223
+ # if a level is given it must be a mi level or
224
+ # equivalent to the axis name
225
+ if isinstance (ax , MultiIndex ):
226
+
227
+ if isinstance (level , compat .string_types ):
228
+ if obj .index .name != level :
229
+ raise ValueError ('level name %s is not the name of the '
230
+ 'index' % level )
231
+ elif level > 0 :
232
+ raise ValueError ('level > 0 only valid with MultiIndex' )
233
+ ax = Index (ax .get_level_values (level ), name = level )
234
+
235
+ else :
236
+ if not (level == 0 or level == ax .name ):
237
+ raise ValueError ("The grouper level {0} is not valid" .format (level ))
238
+
239
+ return self ._get_grouper_for_ax (ax )
240
+
241
+ def _get_grouper_for_ax (self , ax ):
242
+ return ax
243
+
244
+ @property
245
+ def groups (self ):
246
+ return self .grouper .groups
142
247
143
248
class GroupBy (PandasObject ):
144
249
@@ -882,10 +987,9 @@ def _is_indexed_like(obj, axes):
882
987
return False
883
988
884
989
885
- class Grouper (object ):
886
-
990
+ class BaseGrouper (object ):
887
991
"""
888
-
992
+ This is an internal Grouper class, which actually holds the generated groups
889
993
"""
890
994
891
995
def __init__ (self , axis , groupings , sort = True , group_keys = True ):
@@ -1328,19 +1432,7 @@ def generate_bins_generic(values, binner, closed):
1328
1432
1329
1433
return bins
1330
1434
1331
-
1332
- class CustomGrouper (object ):
1333
-
1334
- def get_grouper (self , obj ):
1335
- raise NotImplementedError
1336
-
1337
- # delegates
1338
- @property
1339
- def groups (self ):
1340
- return self .grouper .groups
1341
-
1342
-
1343
- class BinGrouper (Grouper ):
1435
+ class BinGrouper (BaseGrouper ):
1344
1436
1345
1437
def __init__ (self , bins , binlabels , filter_empty = False ):
1346
1438
self .bins = com ._ensure_int64 (bins )
@@ -1495,7 +1587,7 @@ class Grouping(object):
1495
1587
* groups : dict of {group -> label_list}
1496
1588
"""
1497
1589
1498
- def __init__ (self , index , grouper = None , obj = None , axis = 0 , name = None , level = None ,
1590
+ def __init__ (self , index , grouper = None , obj = None , name = None , level = None ,
1499
1591
sort = True ):
1500
1592
1501
1593
self .name = name
@@ -1515,6 +1607,10 @@ def __init__(self, index, grouper=None, obj=None, axis=0, name=None, level=None,
1515
1607
self ._was_factor = False
1516
1608
self ._should_compress = True
1517
1609
1610
+ # we have a single grouper which may be a myriad of things, some of which are
1611
+ # dependent on the passing in level
1612
+ #
1613
+
1518
1614
if level is not None :
1519
1615
if not isinstance (level , int ):
1520
1616
if level not in index .names :
@@ -1556,7 +1652,10 @@ def __init__(self, index, grouper=None, obj=None, axis=0, name=None, level=None,
1556
1652
else :
1557
1653
if isinstance (self .grouper , (list , tuple )):
1558
1654
self .grouper = com ._asarray_tuplesafe (self .grouper )
1655
+
1656
+ # a passed Categorical
1559
1657
elif isinstance (self .grouper , Categorical ):
1658
+
1560
1659
factor = self .grouper
1561
1660
self ._was_factor = True
1562
1661
@@ -1568,27 +1667,10 @@ def __init__(self, index, grouper=None, obj=None, axis=0, name=None, level=None,
1568
1667
if self .name is None :
1569
1668
self .name = factor .name
1570
1669
1571
- # a passed TimeGrouper like
1572
- elif isinstance (self .grouper , CustomGrouper ):
1573
-
1574
- # get the obj to work on
1575
- if self .grouper .name is not None :
1576
- name = self .grouper .name
1577
- if name not in obj ._info_axis :
1578
- raise KeyError ("The grouper name {0} is not found" .format (name ))
1579
- ax = Index (obj [name ],name = name )
1580
- else :
1581
- ax = obj ._get_axis (axis )
1582
- if self .grouper .level is not None :
1583
- level = self .grouper .level
1584
- if isinstance (ax , MultiIndex ):
1585
- level = ax ._get_level_name (level )
1586
- ax = Index (ax .get_level_values (level ), name = level )
1587
- else :
1588
- if not (level == 0 or level == ax .name ):
1589
- raise ValueError ("The grouper level {0} is not valid" .format (level ))
1670
+ # a passed Grouper like
1671
+ elif isinstance (self .grouper , Grouper ):
1590
1672
1591
- self .grouper = self .grouper ._get_grouper_for_ax ( ax )
1673
+ self .grouper = self .grouper .get_grouper_for_ax ( obj )
1592
1674
if self .name is None :
1593
1675
self .name = self .grouper .name
1594
1676
@@ -1674,10 +1756,10 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True):
1674
1756
level = None
1675
1757
key = group_axis
1676
1758
1677
- if isinstance (key , CustomGrouper ):
1759
+ if isinstance (key , Grouper ):
1678
1760
binner , gpr , obj = key .get_grouper (obj )
1679
1761
return gpr , [], obj
1680
- elif isinstance (key , Grouper ):
1762
+ elif isinstance (key , BaseGrouper ):
1681
1763
return key , [], obj
1682
1764
1683
1765
if not isinstance (key , (tuple , list )):
@@ -1730,13 +1812,14 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True):
1730
1812
errmsg = "Categorical grouper must have len(grouper) == len(data)"
1731
1813
raise AssertionError (errmsg )
1732
1814
1733
- ping = Grouping (group_axis , gpr , obj = obj , axis = axis , name = name , level = level , sort = sort )
1815
+ ping = Grouping (group_axis , gpr , obj = obj , name = name , level = level , sort = sort )
1734
1816
groupings .append (ping )
1735
1817
1736
1818
if len (groupings ) == 0 :
1737
1819
raise ValueError ('No group keys passed!' )
1738
1820
1739
- grouper = Grouper (group_axis , groupings , sort = sort )
1821
+ # create the internals grouper
1822
+ grouper = BaseGrouper (group_axis , groupings , sort = sort )
1740
1823
1741
1824
return grouper , exclusions , obj
1742
1825
0 commit comments