Add scaffold docstring for groupby.DataFrameGroupBy.resample.

pandres · pandres · commit 985ebd457ba2 · 2018-03-06T13:25:54.000-06:00
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
@@ -1475,9 +1475,169 @@ def describe(self, **kwargs):
     @Substitution(name='groupby')
     @Appender(_doc_template)
     def resample(self, rule, *args, **kwargs):
-        """
-        Provide resampling when using a TimeGrouper
+        """Provide resampling when using a TimeGrouper.
+
         Return a new grouper with our resampler appended
+
+        Parameters
+        ----------
+        rule : str
+            Medio.
+        args
+            Hola.
+        kwargs
+            Chau.
+
+        Returns
+        -------
+        Grouper
+            Return a new grouper with our resampler appended
+
+        Examples
+        --------
+
+        Start by creating a series with 9 one minute timestamps.
+        >>> index = pd.date_range('1/1/2000', periods=9, freq='T')
+        >>> series = pd.Series(range(9), index=index)
+        >>> series
+        2000-01-01 00:00:00    0
+        2000-01-01 00:01:00    1
+        2000-01-01 00:02:00    2
+        2000-01-01 00:03:00    3
+        2000-01-01 00:04:00    4
+        2000-01-01 00:05:00    5
+        2000-01-01 00:06:00    6
+        2000-01-01 00:07:00    7
+        2000-01-01 00:08:00    8
+        Freq: T, dtype: int64
+
+        Downsample the series into 3 minute bins and sum the values
+        of the timestamps falling into a bin.
+        >>> series.resample('3T').sum()
+        2000-01-01 00:00:00     3
+        2000-01-01 00:03:00    12
+        2000-01-01 00:06:00    21
+        Freq: 3T, dtype: int64
+
+        Downsample the series into 3 minute bins as above, but label each
+        bin using the right edge instead of the left. Please note that the
+        value in the bucket used as the label is not included in the bucket,
+        which it labels. For example, in the original series the
+        bucket ``2000-01-01 00:03:00`` contains the value 3, but the summed
+        value in the resampled bucket with the label ``2000-01-01 00:03:00``
+        does not include 3 (if it did, the summed value would be 6, not 3).
+        To include this value close the right side of the bin interval as
+        illustrated in the example below this one.
+        >>> series.resample('3T', label='right').sum()
+        2000-01-01 00:03:00     3
+        2000-01-01 00:06:00    12
+        2000-01-01 00:09:00    21
+        Freq: 3T, dtype: int64
+
+        Downsample the series into 3 minute bins as above, but close the right
+        side of the bin interval.
+        >>> series.resample('3T', label='right', closed='right').sum()
+        2000-01-01 00:00:00     0
+        2000-01-01 00:03:00     6
+        2000-01-01 00:06:00    15
+        2000-01-01 00:09:00    15
+        Freq: 3T, dtype: int64
+
+        Upsample the series into 30 second bins.
+        >>> series.resample('30S').asfreq()[0:5] #select first 5 rows
+        2000-01-01 00:00:00   0.0
+        2000-01-01 00:00:30   NaN
+        2000-01-01 00:01:00   1.0
+        2000-01-01 00:01:30   NaN
+        2000-01-01 00:02:00   2.0
+        Freq: 30S, dtype: float64
+
+        Upsample the series into 30 second bins and fill the ``NaN``
+        values using the ``pad`` method.
+        >>> series.resample('30S').pad()[0:5]
+        2000-01-01 00:00:00    0
+        2000-01-01 00:00:30    0
+        2000-01-01 00:01:00    1
+        2000-01-01 00:01:30    1
+        2000-01-01 00:02:00    2
+        Freq: 30S, dtype: int64
+
+        Upsample the series into 30 second bins and fill the
+        ``NaN`` values using the ``bfill`` method.
+        >>> series.resample('30S').bfill()[0:5]
+        2000-01-01 00:00:00    0
+        2000-01-01 00:00:30    1
+        2000-01-01 00:01:00    1
+        2000-01-01 00:01:30    2
+        2000-01-01 00:02:00    2
+        Freq: 30S, dtype: int64
+
+        Pass a custom function via ``apply``
+        >>> def custom_resampler(array_like):
+        ...     return np.sum(array_like)+5
+        >>> series.resample('3T').apply(custom_resampler)
+        2000-01-01 00:00:00     8
+        2000-01-01 00:03:00    17
+        2000-01-01 00:06:00    26
+        Freq: 3T, dtype: int64
+
+        For a Series with a PeriodIndex, the keyword `convention` can be
+        used to control whether to use the start or end of `rule`.
+        >>> s = pd.Series([1, 2], index=pd.period_range('2012-01-01', freq='A', periods=2))
+        >>> s
+        2012    1
+        2013    2
+        Freq: A-DEC, dtype: int64
+
+        Resample by month using 'start' `convention`. Values are assigned to
+        the first month of the period.
+        >>> s.resample('M', convention='start').asfreq().head()
+        2012-01    1.0
+        2012-02    NaN
+        2012-03    NaN
+        2012-04    NaN
+        2012-05    NaN
+        Freq: M, dtype: float64
+
+        Resample by month using 'end' `convention`. Values are assigned to
+        the last month of the period.
+        >>> s.resample('M', convention='end').asfreq()
+        2012-12    1.0
+        2013-01    NaN
+        2013-02    NaN
+        2013-03    NaN
+        2013-04    NaN
+        2013-05    NaN
+        2013-06    NaN
+        2013-07    NaN
+        2013-08    NaN
+        2013-09    NaN
+        2013-10    NaN
+        2013-11    NaN
+        2013-12    2.0
+        Freq: M, dtype: float64
+
+        For DataFrame objects, the keyword ``on`` can be used to specify the
+        column instead of the index for resampling.
+        >>> df = pd.DataFrame(data=9*[range(4)], columns=['a', 'b', 'c', 'd'])
+        >>> df['time'] = pd.date_range('1/1/2000', periods=9, freq='T')
+        >>> df.resample('3T', on='time').sum()
+                             a  b  c  d
+        time
+        2000-01-01 00:00:00  0  3  6  9
+        2000-01-01 00:03:00  0  3  6  9
+        2000-01-01 00:06:00  0  3  6  9
+
+        For a DataFrame with MultiIndex, the keyword ``level`` can be used to
+        specify on level the resampling needs to take place.
+        >>> time = pd.date_range('1/1/2000', periods=5, freq='T')
+
+        >>> df2 = pd.DataFrame(data=10*[range(4)], columns=['a', 'b', 'c', 'd'], index=pd.MultiIndex.from_product([time, [1, 2]]) )
+        >>> df2.resample('3T', level=0).sum()
+                             a  b   c   d
+        2000-01-01 00:00:00  0  6  12  18
+        2000-01-01 00:03:00  0  4   8  12
+
         """
         from pandas.core.resample import get_resampler_for_grouping
         return get_resampler_for_grouping(self, rule, *args, **kwargs)