diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst index b5c15f83bb9d3..cc5ebc730f94a 100644 --- a/doc/source/groupby.rst +++ b/doc/source/groupby.rst @@ -707,6 +707,54 @@ can be used as group keys. If so, the order of the levels will be preserved: data.groupby(factor).mean() +.. _groupby.specify: + +Grouping with a Grouper specification +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Your may need to specify a bit more data to properly group. You can +use the ``pd.Grouper`` to provide this local control. + +.. ipython:: python + + import datetime as DT + + df = DataFrame({ + 'Branch' : 'A A A A A A A B'.split(), + 'Buyer': 'Carl Mark Carl Carl Joe Joe Joe Carl'.split(), + 'Quantity': [1,3,5,1,8,1,9,3], + 'Date' : [ + DT.datetime(2013,1,1,13,0), + DT.datetime(2013,1,1,13,5), + DT.datetime(2013,10,1,20,0), + DT.datetime(2013,10,2,10,0), + DT.datetime(2013,10,1,20,0), + DT.datetime(2013,10,2,10,0), + DT.datetime(2013,12,2,12,0), + DT.datetime(2013,12,2,14,0), + ]}) + + df + +Groupby a specific column with the desired frequency. This is like resampling. + +.. ipython:: python + + df.groupby([pd.Grouper(freq='1M',key='Date'),'Buyer']).sum() + +You have an ambiguous specification in that you have a named index and a column +that could be potential groupers. + +.. ipython:: python + + df = df.set_index('Date') + df['Date'] = df.index + pd.offsets.MonthEnd(2) + df.groupby([pd.Grouper(freq='6M',key='Date'),'Buyer']).sum() + + df.groupby([pd.Grouper(freq='6M',level='Date'),'Buyer']).sum() + + +.. _groupby.nth: Taking the first rows of each group ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -753,7 +801,7 @@ To select from a DataFrame or Series the nth item, use the nth method: g.nth(-1) -If you want to select the nth not-null method, use the dropna kwarg. For a DataFrame this should be either 'any' or 'all' just like you would pass to dropna, for a Series this just needs to be truthy. +If you want to select the nth not-null method, use the dropna kwarg. For a DataFrame this should be either 'any' or 'all' just like you would pass to dropna, for a Series this just needs to be truthy. .. ipython:: python @@ -787,6 +835,9 @@ To see the order in which each row appears within its group, use the Examples -------- +Regrouping by factor +~~~~~~~~~~~~~~~~~~~~ + Regroup columns of a DataFrame according to their sum, and sum the aggregated ones. .. ipython:: python @@ -796,6 +847,9 @@ Regroup columns of a DataFrame according to their sum, and sum the aggregated on df.groupby(df.sum(), axis=1).sum() +Returning a Series to propogate names +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Group DataFrame columns, compute a set of metrics and return a named Series. The Series name is used as the name for the column index. This is especially useful in conjunction with reshaping operations such as stacking in which the @@ -808,7 +862,7 @@ column index name will be used as the name of the inserted column: 'b': [0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1], 'c': [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0], 'd': [0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1], - }) + }) def compute_metrics(x): result = {'b_sum': x['b'].sum(), 'c_mean': x['c'].mean()} diff --git a/doc/source/release.rst b/doc/source/release.rst index 4890f22e98468..f5108effba48a 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -125,6 +125,8 @@ API Changes ``DataFrame.stack`` operations where the name of the column index is used as the name of the inserted column containing the pivoted data. +- Allow specification of a more complex groupby, via ``pd.Groupby`` (:issue:`3794`) + Experimental Features ~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt index cfee48d62928b..0613d56604844 100644 --- a/doc/source/v0.14.0.txt +++ b/doc/source/v0.14.0.txt @@ -11,6 +11,7 @@ Highlights include: - MultIndexing Using Slicers - Joining a singly-indexed DataFrame with a multi-indexed DataFrame +- More flexible groupby specifications API changes ~~~~~~~~~~~ @@ -80,7 +81,7 @@ These are out-of-bounds selections g[['B']].head(1) - groupby ``nth`` now filters by default, with optional dropna argument to ignore - NaN (to replicate the previous behaviour.) + NaN (to replicate the previous behaviour.), See :ref:`the docs `. .. ipython:: python @@ -90,6 +91,9 @@ These are out-of-bounds selections g.nth(0, dropna='any') # similar to old behaviour +- Allow specification of a more complex groupby via ``pd.Groupby``, such as grouping + by a Time and a string field simultaneously. See :ref:`the docs `. (:issue:`3794`) + - Local variable usage has changed in :func:`pandas.eval`/:meth:`DataFrame.eval`/:meth:`DataFrame.query` (:issue:`5987`). For the :class:`~pandas.DataFrame` methods, two things have @@ -121,6 +125,7 @@ These are out-of-bounds selections .. ipython:: python i[[0,1,2]].astype(np.int_) + - ``set_index`` no longer converts MultiIndexes to an Index of tuples. For example, the old behavior returned an Index in this case (:issue:`6459`): diff --git a/pandas/core/api.py b/pandas/core/api.py index 4d8d4dcda7589..3ebcb46cd98fa 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -6,6 +6,7 @@ from pandas.core.algorithms import factorize, match, unique, value_counts from pandas.core.common import isnull, notnull from pandas.core.categorical import Categorical +from pandas.core.groupby import Grouper from pandas.core.format import set_eng_float_format from pandas.core.index import Index, Int64Index, Float64Index, MultiIndex diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 86590d2319447..7bf20d71cb301 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -139,6 +139,125 @@ def _last(x): else: return _last(x) +class Grouper(object): + """ + A Grouper allows the user to specify a groupby instruction + + Parameters + ---------- + key : groupby key, default None + level : name, int level number, default None + freq : string / freqency object, default None + sort : boolean, whether to sort the resulting labels, default True + + Returns + ------- + A specification for a groupby instruction + + Examples + -------- + df.groupby(Group(key='A')) : syntatic sugar for df.groupby('A') + df.groupby(Group(key='date',freq='60s')) : specify a resample on the column 'date' + df.groupby(Group(level='date',freq='60s',axis=1)) : + specify a resample on the level 'date' on the columns axis with a frequency of 60s + + """ + + def __new__(cls, *args, **kwargs): + if kwargs.get('freq') is not None: + from pandas.tseries.resample import TimeGrouper + cls = TimeGrouper + return super(Grouper, cls).__new__(cls) + + def __init__(self, key=None, level=None, freq=None, axis=None, sort=True): + self.key=key + self.level=level + self.freq=freq + self.axis=axis + self.sort=sort + + self.grouper=None + self.obj=None + self.indexer=None + self.binner=None + self.grouper=None + + @property + def ax(self): + return self.grouper + + def get_grouper(self, obj): + + """ + Parameters + ---------- + obj : the subject object + + Returns + ------- + a tuple of binner, grouper, obj (possibly sorted) + """ + + self.set_grouper(obj) + return self.binner, self.grouper, self.obj + + def set_grouper(self, obj): + """ + given an object and the specifcations, setup the internal grouper for this particular specification + + Parameters + ---------- + obj : the subject object + + """ + + if self.key is not None and self.level is not None: + raise ValueError("The Grouper cannot specify both a key and a level!") + + # the key must be a valid info item + if self.key is not None: + key = self.key + if key not in obj._info_axis: + raise KeyError("The grouper name {0} is not found".format(key)) + ax = Index(obj[key],name=key) + + else: + ax = obj._get_axis(self.axis) + if self.level is not None: + level = self.level + + # if a level is given it must be a mi level or + # equivalent to the axis name + if isinstance(ax, MultiIndex): + + if isinstance(level, compat.string_types): + if obj.index.name != level: + raise ValueError('level name %s is not the name of the ' + 'index' % level) + elif level > 0: + raise ValueError('level > 0 only valid with MultiIndex') + ax = Index(ax.get_level_values(level), name=level) + + else: + if not (level == 0 or level == ax.name): + raise ValueError("The grouper level {0} is not valid".format(level)) + + # possibly sort + if not ax.is_monotonic: + indexer = self.indexer = ax.argsort(kind='quicksort') + ax = ax.take(indexer) + obj = obj.take(indexer, axis=self.axis, convert=False, is_copy=False) + + self.obj = obj + self.grouper = ax + return self.grouper + + def get_binner_for_grouping(self, obj): + raise NotImplementedError + + @property + def groups(self): + return self.grouper.groups class GroupBy(PandasObject): @@ -882,10 +1001,9 @@ def _is_indexed_like(obj, axes): return False -class Grouper(object): - +class BaseGrouper(object): """ - + This is an internal Grouper class, which actually holds the generated groups """ def __init__(self, axis, groupings, sort=True, group_keys=True): @@ -1328,19 +1446,7 @@ def generate_bins_generic(values, binner, closed): return bins - -class CustomGrouper(object): - - def get_grouper(self, obj): - raise NotImplementedError - - # delegates - @property - def groups(self): - return self.grouper.groups - - -class BinGrouper(Grouper): +class BinGrouper(BaseGrouper): def __init__(self, bins, binlabels, filter_empty=False): self.bins = com._ensure_int64(bins) @@ -1479,6 +1585,7 @@ class Grouping(object): ---------- index : Index grouper : + obj : name : level : @@ -1493,7 +1600,7 @@ class Grouping(object): * groups : dict of {group -> label_list} """ - def __init__(self, index, grouper=None, name=None, level=None, + def __init__(self, index, grouper=None, obj=None, name=None, level=None, sort=True): self.name = name @@ -1513,6 +1620,10 @@ def __init__(self, index, grouper=None, name=None, level=None, self._was_factor = False self._should_compress = True + # we have a single grouper which may be a myriad of things, some of which are + # dependent on the passing in level + # + if level is not None: if not isinstance(level, int): if level not in index.names: @@ -1554,7 +1665,10 @@ def __init__(self, index, grouper=None, name=None, level=None, else: if isinstance(self.grouper, (list, tuple)): self.grouper = com._asarray_tuplesafe(self.grouper) + + # a passed Categorical elif isinstance(self.grouper, Categorical): + factor = self.grouper self._was_factor = True @@ -1566,6 +1680,15 @@ def __init__(self, index, grouper=None, name=None, level=None, if self.name is None: self.name = factor.name + # a passed Grouper like + elif isinstance(self.grouper, Grouper): + + # get the new grouper + grouper = self.grouper.get_binner_for_grouping(obj) + self.grouper = grouper + if self.name is None: + self.name = grouper.name + # no level passed if not isinstance(self.grouper, (Series, np.ndarray)): self.grouper = self.index.map(self.grouper) @@ -1634,8 +1757,28 @@ def groups(self): def _get_grouper(obj, key=None, axis=0, level=None, sort=True): + """ + create and return a BaseGrouper, which is an internal + mapping of how to create the grouper indexers. + This may be composed of multiple Grouping objects, indicating + multiple groupers + + Groupers are ultimately index mappings. They can originate as: + index mappings, keys to columns, functions, or Groupers + + Groupers enable local references to axis,level,sort, while + the passed in axis, level, and sort are 'global'. + + This routine tries to figure of what the passing in references + are and then creates a Grouping for each one, combined into + a BaseGrouper. + + """ + group_axis = obj._get_axis(axis) + # validate thatthe passed level is compatible with the passed + # axis of the object if level is not None: if not isinstance(group_axis, MultiIndex): if isinstance(level, compat.string_types): @@ -1648,10 +1791,13 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True): level = None key = group_axis - if isinstance(key, CustomGrouper): - binner, gpr, obj = key.get_grouper(obj) - return gpr, [], obj - elif isinstance(key, Grouper): + # a passed in Grouper, directly convert + if isinstance(key, Grouper): + binner, grouper, obj = key.get_grouper(obj) + return grouper, [], obj + + # already have a BaseGrouper, just return it + elif isinstance(key, BaseGrouper): return key, [], obj if not isinstance(key, (tuple, list)): @@ -1704,13 +1850,14 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True): errmsg = "Categorical grouper must have len(grouper) == len(data)" raise AssertionError(errmsg) - ping = Grouping(group_axis, gpr, name=name, level=level, sort=sort) + ping = Grouping(group_axis, gpr, obj=obj, name=name, level=level, sort=sort) groupings.append(ping) if len(groupings) == 0: raise ValueError('No group keys passed!') - grouper = Grouper(group_axis, groupings, sort=sort) + # create the internals grouper + grouper = BaseGrouper(group_axis, groupings, sort=sort) return grouper, exclusions, obj diff --git a/pandas/core/series.py b/pandas/core/series.py index bc5566ce4baa1..dd11b7bec9216 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2031,7 +2031,7 @@ def reindex_axis(self, labels, axis=0, **kwargs): raise ValueError("cannot reindex series on non-zero axis!") return self.reindex(index=labels, **kwargs) - def take(self, indices, axis=0, convert=True): + def take(self, indices, axis=0, convert=True, is_copy=False): """ Analogous to ndarray.take, return Series corresponding to requested indices diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index adca8389b8939..4d47750660800 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -2903,6 +2903,144 @@ def test_groupby_with_timegrouper_methods(self): self.assertTrue(isinstance(groups,dict)) self.assertTrue(len(groups) == 3) + def test_timegrouper_with_reg_groups(self): + + # GH 3794 + # allow combinateion of timegrouper/reg groups + + import datetime as DT + + df = DataFrame({ + 'Branch' : 'A A A A A A A B'.split(), + 'Buyer': 'Carl Mark Carl Carl Joe Joe Joe Carl'.split(), + 'Quantity': [1,3,5,1,8,1,9,3], + 'Date' : [ + DT.datetime(2013,1,1,13,0), + DT.datetime(2013,1,1,13,5), + DT.datetime(2013,10,1,20,0), + DT.datetime(2013,10,2,10,0), + DT.datetime(2013,10,1,20,0), + DT.datetime(2013,10,2,10,0), + DT.datetime(2013,12,2,12,0), + DT.datetime(2013,12,2,14,0), + ]}).set_index('Date') + + expected = DataFrame({ + 'Buyer': 'Carl Joe Mark'.split(), + 'Quantity': [10,18,3], + 'Date' : [ + DT.datetime(2013,12,31,0,0), + DT.datetime(2013,12,31,0,0), + DT.datetime(2013,12,31,0,0), + ]}).set_index(['Date','Buyer']) + + result = df.groupby([pd.Grouper(freq='A'),'Buyer']).sum() + assert_frame_equal(result,expected) + + expected = DataFrame({ + 'Buyer': 'Carl Mark Carl Joe'.split(), + 'Quantity': [1,3,9,18], + 'Date' : [ + DT.datetime(2013,1,1,0,0), + DT.datetime(2013,1,1,0,0), + DT.datetime(2013,7,1,0,0), + DT.datetime(2013,7,1,0,0), + ]}).set_index(['Date','Buyer']) + + result = df.groupby([pd.Grouper(freq='6MS'),'Buyer']).sum() + assert_frame_equal(result,expected) + + df = DataFrame({ + 'Branch' : 'A A A A A A A B'.split(), + 'Buyer': 'Carl Mark Carl Carl Joe Joe Joe Carl'.split(), + 'Quantity': [1,3,5,1,8,1,9,3], + 'Date' : [ + DT.datetime(2013,10,1,13,0), + DT.datetime(2013,10,1,13,5), + DT.datetime(2013,10,1,20,0), + DT.datetime(2013,10,2,10,0), + DT.datetime(2013,10,1,20,0), + DT.datetime(2013,10,2,10,0), + DT.datetime(2013,10,2,12,0), + DT.datetime(2013,10,2,14,0), + ]}).set_index('Date') + + expected = DataFrame({ + 'Buyer': 'Carl Joe Mark Carl Joe'.split(), + 'Quantity': [6,8,3,4,10], + 'Date' : [ + DT.datetime(2013,10,1,0,0), + DT.datetime(2013,10,1,0,0), + DT.datetime(2013,10,1,0,0), + DT.datetime(2013,10,2,0,0), + DT.datetime(2013,10,2,0,0), + ]}).set_index(['Date','Buyer']) + + result = df.groupby([pd.Grouper(freq='1D'),'Buyer']).sum() + assert_frame_equal(result,expected) + + result = df.groupby([pd.Grouper(freq='1M'),'Buyer']).sum() + expected = DataFrame({ + 'Buyer': 'Carl Joe Mark'.split(), + 'Quantity': [10,18,3], + 'Date' : [ + DT.datetime(2013,10,31,0,0), + DT.datetime(2013,10,31,0,0), + DT.datetime(2013,10,31,0,0), + ]}).set_index(['Date','Buyer']) + assert_frame_equal(result,expected) + + # passing the name + df = df.reset_index() + result = df.groupby([pd.Grouper(freq='1M',key='Date'),'Buyer']).sum() + assert_frame_equal(result,expected) + + self.assertRaises(KeyError, lambda : df.groupby([pd.Grouper(freq='1M',key='foo'),'Buyer']).sum()) + + # passing the level + df = df.set_index('Date') + result = df.groupby([pd.Grouper(freq='1M',level='Date'),'Buyer']).sum() + assert_frame_equal(result,expected) + result = df.groupby([pd.Grouper(freq='1M',level=0),'Buyer']).sum() + assert_frame_equal(result,expected) + + self.assertRaises(ValueError, lambda : df.groupby([pd.Grouper(freq='1M',level='foo'),'Buyer']).sum()) + + # multi names + df = df.copy() + df['Date'] = df.index + pd.offsets.MonthEnd(2) + result = df.groupby([pd.Grouper(freq='1M',key='Date'),'Buyer']).sum() + expected = DataFrame({ + 'Buyer': 'Carl Joe Mark'.split(), + 'Quantity': [10,18,3], + 'Date' : [ + DT.datetime(2013,11,30,0,0), + DT.datetime(2013,11,30,0,0), + DT.datetime(2013,11,30,0,0), + ]}).set_index(['Date','Buyer']) + assert_frame_equal(result,expected) + + # error as we have both a level and a name! + self.assertRaises(ValueError, lambda : df.groupby([pd.Grouper(freq='1M',key='Date',level='Date'),'Buyer']).sum()) + + + # single groupers + expected = DataFrame({ 'Quantity' : [31], + 'Date' : [DT.datetime(2013,10,31,0,0)] }).set_index('Date') + result = df.groupby(pd.Grouper(freq='1M')).sum() + assert_frame_equal(result, expected) + + result = df.groupby([pd.Grouper(freq='1M')]).sum() + assert_frame_equal(result, expected) + + expected = DataFrame({ 'Quantity' : [31], + 'Date' : [DT.datetime(2013,11,30,0,0)] }).set_index('Date') + result = df.groupby(pd.Grouper(freq='1M',key='Date')).sum() + assert_frame_equal(result, expected) + + result = df.groupby([pd.Grouper(freq='1M',key='Date')]).sum() + assert_frame_equal(result, expected) + def test_cumcount(self): df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A']) g = df.groupby('A') diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index e4221fdea083c..8ab7063eada17 100644 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -2,7 +2,7 @@ import numpy as np -from pandas.core.groupby import BinGrouper, CustomGrouper +from pandas.core.groupby import BinGrouper, Grouper from pandas.tseries.frequencies import to_offset, is_subperiod, is_superperiod from pandas.tseries.index import DatetimeIndex, date_range from pandas.tseries.offsets import DateOffset, Tick, _delta_to_nanoseconds @@ -18,7 +18,7 @@ _DEFAULT_METHOD = 'mean' -class TimeGrouper(CustomGrouper): +class TimeGrouper(Grouper): """ Custom groupby class for time-interval grouping @@ -39,11 +39,11 @@ class TimeGrouper(CustomGrouper): def __init__(self, freq='Min', closed=None, label=None, how='mean', nperiods=None, axis=0, fill_method=None, limit=None, loffset=None, kind=None, - convention=None, base=0): - self.freq = to_offset(freq) + convention=None, base=0, **kwargs): + freq = to_offset(freq) end_types = set(['M', 'A', 'Q', 'BM', 'BA', 'BQ', 'W']) - rule = self.freq.rule_code + rule = freq.rule_code if (rule in end_types or ('-' in rule and rule[:rule.find('-')] in end_types)): if closed is None: @@ -64,19 +64,23 @@ def __init__(self, freq='Min', closed=None, label=None, how='mean', self.convention = convention or 'E' self.convention = self.convention.lower() - self.axis = axis self.loffset = loffset self.how = how self.fill_method = fill_method self.limit = limit self.base = base + # by definition we always sort + kwargs['sort'] = True + + super(TimeGrouper, self).__init__(freq=freq, axis=axis, **kwargs) + def resample(self, obj): - ax = obj._get_axis(self.axis) + self.set_grouper(obj) + ax = self.grouper - obj = self._ensure_sortedness(obj) if isinstance(ax, DatetimeIndex): - rs = self._resample_timestamps(obj) + rs = self._resample_timestamps() elif isinstance(ax, PeriodIndex): offset = to_offset(self.freq) if offset.n > 1: @@ -86,12 +90,13 @@ def resample(self, obj): self.kind = 'timestamp' if self.kind is None or self.kind == 'period': - rs = self._resample_periods(obj) + rs = self._resample_periods() else: - obj = obj.to_timestamp(how=self.convention) - rs = self._resample_timestamps(obj) + obj = self.obj.to_timestamp(how=self.convention) + self.set_grouper(obj) + rs = self._resample_timestamps() elif len(ax) == 0: - return obj + return self.obj else: # pragma: no cover raise TypeError('Only valid with DatetimeIndex or PeriodIndex') @@ -100,30 +105,41 @@ def resample(self, obj): return rs def get_grouper(self, obj): - # return a tuple of (binner, grouper, obj) - return self._get_time_grouper(obj) - - def _ensure_sortedness(self, obj): - # ensure that our object is sorted - ax = obj._get_axis(self.axis) - if not ax.is_monotonic: - try: - obj = obj.sort_index(axis=self.axis) - except TypeError: - obj = obj.sort_index() - return obj - - def _get_time_grouper(self, obj): - obj = self._ensure_sortedness(obj) - ax = obj._get_axis(self.axis) + self.set_grouper(obj) + return self.get_binner_for_resample() + def get_binner_for_resample(self): + # create the BinGrouper + # assume that self.set_grouper(obj) has already been called + + ax = self.ax if self.kind is None or self.kind == 'timestamp': - binner, bins, binlabels = self._get_time_bins(ax) + self.binner, bins, binlabels = self._get_time_bins(ax) else: - binner, bins, binlabels = self._get_time_period_bins(ax) - - grouper = BinGrouper(bins, binlabels) - return binner, grouper, obj + self.binner, bins, binlabels = self._get_time_period_bins(ax) + + self.grouper = BinGrouper(bins, binlabels) + return self.binner, self.grouper, self.obj + + def get_binner_for_grouping(self, obj): + # return an ordering of the transformed group labels, + # suitable for multi-grouping, e.g the labels for + # the resampled intervals + ax = self.set_grouper(obj) + self.get_binner_for_resample() + + # create the grouper + binner = self.binner + l = [] + for key, group in self.grouper.get_iterator(ax): + l.extend([key]*len(group)) + grouper = binner.__class__(l,freq=binner.freq,name=binner.name) + + # since we may have had to sort + # may need to reorder groups here + if self.indexer is not None: + grouper = grouper.take(self.indexer) + return grouper def _get_time_bins(self, ax): if not isinstance(ax, DatetimeIndex): @@ -213,10 +229,14 @@ def _get_time_period_bins(self, ax): def _agg_method(self): return self.how if self.how else _DEFAULT_METHOD - def _resample_timestamps(self, obj): - axlabels = obj._get_axis(self.axis) + def _resample_timestamps(self): + # assumes set_grouper(obj) already called + axlabels = self.ax - binner, grouper, _ = self._get_time_grouper(obj) + self.get_binner_for_resample() + grouper = self.grouper + binner = self.binner + obj = self.obj # Determine if we're downsampling if axlabels.freq is not None or axlabels.inferred_freq is not None: @@ -256,8 +276,10 @@ def _resample_timestamps(self, obj): return result - def _resample_periods(self, obj): - axlabels = obj._get_axis(self.axis) + def _resample_periods(self): + # assumes set_grouper(obj) already called + axlabels = self.ax + obj = self.obj if len(axlabels) == 0: new_index = PeriodIndex(data=[], freq=self.freq) diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py index 23b8905b2ae9a..20c6724726955 100644 --- a/pandas/tseries/tests/test_resample.py +++ b/pandas/tseries/tests/test_resample.py @@ -1137,7 +1137,6 @@ def test_apply_iteration(self): _, grouper, _ = tg.get_grouper(df) # Errors - grouped = df.groupby(grouper, group_keys=False) f = lambda df: df['close'] / df['open']