From 7ee027269783e0bc36d9a9661fa4cdbde1ec9281 Mon Sep 17 00:00:00 2001 From: Winterflower Date: Tue, 4 Aug 2015 22:04:34 +0100 Subject: [PATCH] Add more examples to Series.resample Add more resample examples Improve Series examples Series resample fixes Fixed breaking docstring issue --- pandas/core/generic.py | 100 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 273c444b30b80..ce6e66c4bc209 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3274,7 +3274,107 @@ def resample(self, rule, how=None, axis=0, fill_method=None, For frequencies that evenly subdivide 1 day, the "origin" of the aggregated intervals. For example, for '5min' frequency, base could range from 0 through 4. Defaults to 0 + + + Examples + -------- + + Start by creating a series with 9 one minute timestamps. + + >>> index = pd.date_range('1/1/2000', periods=9, freq='T') + >>> series = pd.Series(range(9), index=index) + >>> series + 2000-01-01 00:00:00 0 + 2000-01-01 00:01:00 1 + 2000-01-01 00:02:00 2 + 2000-01-01 00:03:00 3 + 2000-01-01 00:04:00 4 + 2000-01-01 00:05:00 5 + 2000-01-01 00:06:00 6 + 2000-01-01 00:07:00 7 + 2000-01-01 00:08:00 8 + Freq: T, dtype: int64 + + Downsample the series into 3 minute bins and sum the values + of the timestamps falling into a bin. + + >>> series.resample('3T', how='sum') + 2000-01-01 00:00:00 3 + 2000-01-01 00:03:00 12 + 2000-01-01 00:06:00 21 + Freq: 3T, dtype: int64 + + Downsample the series into 3 minute bins as above, but label each + bin using the right edge instead of the left. Please note that the + value in the bucket used as the label is not included in the bucket, + which it labels. For example, in the original series the + bucket ``2000-01-01 00:03:00`` contains the value 3, but the summed + value in the resampled bucket with the label``2000-01-01 00:03:00`` + does not include 3 (if it did, the summed value would be 6, not 3). + To include this value close the right side of the bin interval as + illustrated in the example below this one. + + >>> series.resample('3T', how='sum', label='right') + 2000-01-01 00:03:00 3 + 2000-01-01 00:06:00 12 + 2000-01-01 00:09:00 21 + Freq: 3T, dtype: int64 + + Downsample the series into 3 minute bins as above, but close the right + side of the bin interval. + + >>> series.resample('3T', how='sum', label='right', closed='right') + 2000-01-01 00:00:00 0 + 2000-01-01 00:03:00 6 + 2000-01-01 00:06:00 15 + 2000-01-01 00:09:00 15 + Freq: 3T, dtype: int64 + + Upsample the series into 30 second bins. + + >>> series.resample('30S')[0:5] #select first 5 rows + 2000-01-01 00:00:00 0 + 2000-01-01 00:00:30 NaN + 2000-01-01 00:01:00 1 + 2000-01-01 00:01:30 NaN + 2000-01-01 00:02:00 2 + Freq: 30S, dtype: float64 + + Upsample the series into 30 second bins and fill the ``NaN`` + values using the ``pad`` method. + + >>> series.resample('30S', fill_method='pad')[0:5] + 2000-01-01 00:00:00 0 + 2000-01-01 00:00:30 0 + 2000-01-01 00:01:00 1 + 2000-01-01 00:01:30 1 + 2000-01-01 00:02:00 2 + Freq: 30S, dtype: int64 + + Upsample the series into 30 second bins and fill the + ``NaN`` values using the ``bfill`` method. + + >>> series.resample('30S', fill_method='bfill')[0:5] + 2000-01-01 00:00:00 0 + 2000-01-01 00:00:30 1 + 2000-01-01 00:01:00 1 + 2000-01-01 00:01:30 2 + 2000-01-01 00:02:00 2 + Freq: 30S, dtype: int64 + + Pass a custom function to ``how``. + + >>> def custom_resampler(array_like): + ... return np.sum(array_like)+5 + + >>> series.resample('3T', how=custom_resampler) + 2000-01-01 00:00:00 8 + 2000-01-01 00:03:00 17 + 2000-01-01 00:06:00 26 + Freq: 3T, dtype: int64 + """ + from pandas.tseries.resample import TimeGrouper axis = self._get_axis_number(axis) sampler = TimeGrouper(rule, label=label, closed=closed, how=how,