Skip to content

Commit 05f4441

Browse files
jmrrPingviinituutti
authored andcommitted
DOC: Updating Series.resample and DataFrame.resample docstrings (pandas-dev#23197)
1 parent a8f3abe commit 05f4441

File tree

2 files changed

+140
-80
lines changed

2 files changed

+140
-80
lines changed

ci/code_checks.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
151151

152152
MSG='Doctests generic.py' ; echo $MSG
153153
pytest -q --doctest-modules pandas/core/generic.py \
154-
-k"-_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -resample -to_json -transpose -values -xs"
154+
-k"-_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -to_json -transpose -values -xs"
155155
RET=$(($RET + $?)) ; echo $MSG "DONE"
156156

157157
MSG='Doctests top-level reshaping functions' ; echo $MSG

pandas/core/generic.py

+139-79
Original file line numberDiff line numberDiff line change
@@ -7500,46 +7500,67 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None,
75007500
label=None, convention='start', kind=None, loffset=None,
75017501
limit=None, base=0, on=None, level=None):
75027502
"""
7503+
Resample time-series data.
7504+
75037505
Convenience method for frequency conversion and resampling of time
7504-
series. Object must have a datetime-like index (DatetimeIndex,
7505-
PeriodIndex, or TimedeltaIndex), or pass datetime-like values
7506-
to the on or level keyword.
7506+
series. Object must have a datetime-like index (`DatetimeIndex`,
7507+
`PeriodIndex`, or `TimedeltaIndex`), or pass datetime-like values
7508+
to the `on` or `level` keyword.
75077509
75087510
Parameters
75097511
----------
7510-
rule : string
7511-
the offset string or object representing target conversion
7512-
axis : int, optional, default 0
7513-
closed : {'right', 'left'}
7512+
rule : str
7513+
The offset string or object representing target conversion.
7514+
how : str
7515+
Method for down/re-sampling, default to 'mean' for downsampling.
7516+
7517+
.. deprecated:: 0.18.0
7518+
The new syntax is ``.resample(...).mean()``, or
7519+
``.resample(...).apply(<func>)``
7520+
axis : {0 or 'index', 1 or 'columns'}, default 0
7521+
Which axis to use for up- or down-sampling. For `Series` this
7522+
will default to 0, i.e. along the rows. Must be
7523+
`DatetimeIndex`, `TimedeltaIndex` or `PeriodIndex`.
7524+
fill_method : str, default None
7525+
Filling method for upsampling.
7526+
7527+
.. deprecated:: 0.18.0
7528+
The new syntax is ``.resample(...).<func>()``,
7529+
e.g. ``.resample(...).pad()``
7530+
closed : {'right', 'left'}, default None
75147531
Which side of bin interval is closed. The default is 'left'
75157532
for all frequency offsets except for 'M', 'A', 'Q', 'BM',
75167533
'BA', 'BQ', and 'W' which all have a default of 'right'.
7517-
label : {'right', 'left'}
7534+
label : {'right', 'left'}, default None
75187535
Which bin edge label to label bucket with. The default is 'left'
75197536
for all frequency offsets except for 'M', 'A', 'Q', 'BM',
75207537
'BA', 'BQ', and 'W' which all have a default of 'right'.
7521-
convention : {'start', 'end', 's', 'e'}
7522-
For PeriodIndex only, controls whether to use the start or end of
7523-
`rule`
7524-
kind: {'timestamp', 'period'}, optional
7538+
convention : {'start', 'end', 's', 'e'}, default 'start'
7539+
For `PeriodIndex` only, controls whether to use the start or
7540+
end of `rule`.
7541+
kind : {'timestamp', 'period'}, optional, default None
75257542
Pass 'timestamp' to convert the resulting index to a
7526-
``DateTimeIndex`` or 'period' to convert it to a ``PeriodIndex``.
7543+
`DateTimeIndex` or 'period' to convert it to a `PeriodIndex`.
75277544
By default the input representation is retained.
7528-
loffset : timedelta
7529-
Adjust the resampled time labels
7545+
loffset : timedelta, default None
7546+
Adjust the resampled time labels.
7547+
limit : int, default None
7548+
Maximum size gap when reindexing with `fill_method`.
7549+
7550+
.. deprecated:: 0.18.0
75307551
base : int, default 0
75317552
For frequencies that evenly subdivide 1 day, the "origin" of the
75327553
aggregated intervals. For example, for '5min' frequency, base could
7533-
range from 0 through 4. Defaults to 0
7534-
on : string, optional
7554+
range from 0 through 4. Defaults to 0.
7555+
on : str, optional
75357556
For a DataFrame, column to use instead of index for resampling.
75367557
Column must be datetime-like.
75377558
75387559
.. versionadded:: 0.19.0
75397560
7540-
level : string or int, optional
7561+
level : str or int, optional
75417562
For a MultiIndex, level (name or number) to use for
7542-
resampling. Level must be datetime-like.
7563+
resampling. `level` must be datetime-like.
75437564
75447565
.. versionadded:: 0.19.0
75457566
@@ -7556,6 +7577,12 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None,
75567577
To learn more about the offset strings, please see `this link
75577578
<http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
75587579
7580+
See Also
7581+
--------
7582+
groupby : Group by mapping, function, label, or list of labels.
7583+
Series.resample : Resample a Series.
7584+
DataFrame.resample: Resample a DataFrame.
7585+
75597586
Examples
75607587
--------
75617588
@@ -7612,7 +7639,7 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None,
76127639
76137640
Upsample the series into 30 second bins.
76147641
7615-
>>> series.resample('30S').asfreq()[0:5] #select first 5 rows
7642+
>>> series.resample('30S').asfreq()[0:5] # Select first 5 rows
76167643
2000-01-01 00:00:00 0.0
76177644
2000-01-01 00:00:30 NaN
76187645
2000-01-01 00:01:00 1.0
@@ -7645,8 +7672,8 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None,
76457672
Pass a custom function via ``apply``
76467673
76477674
>>> def custom_resampler(array_like):
7648-
... return np.sum(array_like)+5
7649-
7675+
... return np.sum(array_like) + 5
7676+
...
76507677
>>> series.resample('3T').apply(custom_resampler)
76517678
2000-01-01 00:00:00 8
76527679
2000-01-01 00:03:00 17
@@ -7656,73 +7683,106 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None,
76567683
For a Series with a PeriodIndex, the keyword `convention` can be
76577684
used to control whether to use the start or end of `rule`.
76587685
7686+
Resample a year by quarter using 'start' `convention`. Values are
7687+
assigned to the first quarter of the period.
7688+
76597689
>>> s = pd.Series([1, 2], index=pd.period_range('2012-01-01',
7660-
freq='A',
7661-
periods=2))
7690+
... freq='A',
7691+
... periods=2))
76627692
>>> s
76637693
2012 1
76647694
2013 2
76657695
Freq: A-DEC, dtype: int64
7666-
7667-
Resample by month using 'start' `convention`. Values are assigned to
7668-
the first month of the period.
7669-
7670-
>>> s.resample('M', convention='start').asfreq().head()
7671-
2012-01 1.0
7672-
2012-02 NaN
7673-
2012-03 NaN
7674-
2012-04 NaN
7675-
2012-05 NaN
7676-
Freq: M, dtype: float64
7677-
7678-
Resample by month using 'end' `convention`. Values are assigned to
7679-
the last month of the period.
7680-
7681-
>>> s.resample('M', convention='end').asfreq()
7682-
2012-12 1.0
7683-
2013-01 NaN
7684-
2013-02 NaN
7685-
2013-03 NaN
7686-
2013-04 NaN
7687-
2013-05 NaN
7688-
2013-06 NaN
7689-
2013-07 NaN
7690-
2013-08 NaN
7691-
2013-09 NaN
7692-
2013-10 NaN
7693-
2013-11 NaN
7694-
2013-12 2.0
7696+
>>> s.resample('Q', convention='start').asfreq()
7697+
2012Q1 1.0
7698+
2012Q2 NaN
7699+
2012Q3 NaN
7700+
2012Q4 NaN
7701+
2013Q1 2.0
7702+
2013Q2 NaN
7703+
2013Q3 NaN
7704+
2013Q4 NaN
7705+
Freq: Q-DEC, dtype: float64
7706+
7707+
Resample quarters by month using 'end' `convention`. Values are
7708+
assigned to the last month of the period.
7709+
7710+
>>> q = pd.Series([1, 2, 3, 4], index=pd.period_range('2018-01-01',
7711+
... freq='Q',
7712+
... periods=4))
7713+
>>> q
7714+
2018Q1 1
7715+
2018Q2 2
7716+
2018Q3 3
7717+
2018Q4 4
7718+
Freq: Q-DEC, dtype: int64
7719+
>>> q.resample('M', convention='end').asfreq()
7720+
2018-03 1.0
7721+
2018-04 NaN
7722+
2018-05 NaN
7723+
2018-06 2.0
7724+
2018-07 NaN
7725+
2018-08 NaN
7726+
2018-09 3.0
7727+
2018-10 NaN
7728+
2018-11 NaN
7729+
2018-12 4.0
76957730
Freq: M, dtype: float64
76967731
7697-
For DataFrame objects, the keyword ``on`` can be used to specify the
7732+
For DataFrame objects, the keyword `on` can be used to specify the
76987733
column instead of the index for resampling.
76997734
7700-
>>> df = pd.DataFrame(data=9*[range(4)], columns=['a', 'b', 'c', 'd'])
7701-
>>> df['time'] = pd.date_range('1/1/2000', periods=9, freq='T')
7702-
>>> df.resample('3T', on='time').sum()
7703-
a b c d
7704-
time
7705-
2000-01-01 00:00:00 0 3 6 9
7706-
2000-01-01 00:03:00 0 3 6 9
7707-
2000-01-01 00:06:00 0 3 6 9
7708-
7709-
For a DataFrame with MultiIndex, the keyword ``level`` can be used to
7710-
specify on level the resampling needs to take place.
7711-
7712-
>>> time = pd.date_range('1/1/2000', periods=5, freq='T')
7713-
>>> df2 = pd.DataFrame(data=10*[range(4)],
7714-
columns=['a', 'b', 'c', 'd'],
7715-
index=pd.MultiIndex.from_product([time, [1, 2]])
7716-
)
7717-
>>> df2.resample('3T', level=0).sum()
7718-
a b c d
7719-
2000-01-01 00:00:00 0 6 12 18
7720-
2000-01-01 00:03:00 0 4 8 12
7721-
7722-
See also
7723-
--------
7724-
groupby : Group by mapping, function, label, or list of labels.
7735+
>>> d = dict({'price': [10, 11, 9, 13, 14, 18, 17, 19],
7736+
... 'volume': [50, 60, 40, 100, 50, 100, 40, 50]})
7737+
>>> df = pd.DataFrame(d)
7738+
>>> df['week_starting'] = pd.date_range('01/01/2018',
7739+
... periods=8,
7740+
... freq='W')
7741+
>>> df
7742+
price volume week_starting
7743+
0 10 50 2018-01-07
7744+
1 11 60 2018-01-14
7745+
2 9 40 2018-01-21
7746+
3 13 100 2018-01-28
7747+
4 14 50 2018-02-04
7748+
5 18 100 2018-02-11
7749+
6 17 40 2018-02-18
7750+
7 19 50 2018-02-25
7751+
>>> df.resample('M', on='week_starting').mean()
7752+
price volume
7753+
week_starting
7754+
2018-01-31 10.75 62.5
7755+
2018-02-28 17.00 60.0
7756+
7757+
For a DataFrame with MultiIndex, the keyword `level` can be used to
7758+
specify on which level the resampling needs to take place.
7759+
7760+
>>> days = pd.date_range('1/1/2000', periods=4, freq='D')
7761+
>>> d2 = dict({'price': [10, 11, 9, 13, 14, 18, 17, 19],
7762+
... 'volume': [50, 60, 40, 100, 50, 100, 40, 50]})
7763+
>>> df2 = pd.DataFrame(d2,
7764+
... index=pd.MultiIndex.from_product([days,
7765+
... ['morning',
7766+
... 'afternoon']]
7767+
... ))
7768+
>>> df2
7769+
price volume
7770+
2000-01-01 morning 10 50
7771+
afternoon 11 60
7772+
2000-01-02 morning 9 40
7773+
afternoon 13 100
7774+
2000-01-03 morning 14 50
7775+
afternoon 18 100
7776+
2000-01-04 morning 17 40
7777+
afternoon 19 50
7778+
>>> df2.resample('D', level=0).sum()
7779+
price volume
7780+
2000-01-01 21 110
7781+
2000-01-02 22 140
7782+
2000-01-03 32 150
7783+
2000-01-04 36 90
77257784
"""
7785+
77267786
from pandas.core.resample import (resample,
77277787
_maybe_process_deprecations)
77287788
axis = self._get_axis_number(axis)

0 commit comments

Comments
 (0)