From f0859db1242bcb16817b2aaed2496716572c23af Mon Sep 17 00:00:00 2001 From: Guilherme Beltramini Date: Fri, 9 Mar 2018 15:36:49 -0300 Subject: [PATCH 1/5] DOC: Resampler.backfill docstring --- pandas/core/resample.py | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 772568ee84737..aa0ad40a2f86e 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -519,12 +519,14 @@ def nearest(self, limit=None): def backfill(self, limit=None): """ - Backward fill the values + Backward fill the values. + + Resample datetimelike data and fill backwards missing values if any. Parameters ---------- limit : integer, optional - limit of how many values to fill + Limit of how many values to fill. Returns ------- @@ -532,8 +534,31 @@ def backfill(self, limit=None): See Also -------- - Series.fillna - DataFrame.fillna + Series.fillna : Fill NA/NaN values in the Series using the specified + method, which can be 'backfill'. + DataFrame.fillna : Fill NA/NaN values in the DataFrame using the + specified method, which can be 'backfill'. + + Examples + -------- + >>> s = pd.Series([1, 2, 3], + ... index=pd.date_range('20180101', periods=3, freq='h')) + >>> s + 2018-01-01 00:00:00 1 + 2018-01-01 01:00:00 2 + 2018-01-01 02:00:00 3 + Freq: H, dtype: int64 + >>> s.resample('15min').backfill(limit=2) + 2018-01-01 00:00:00 1.0 + 2018-01-01 00:15:00 NaN + 2018-01-01 00:30:00 2.0 + 2018-01-01 00:45:00 2.0 + 2018-01-01 01:00:00 2.0 + 2018-01-01 01:15:00 NaN + 2018-01-01 01:30:00 3.0 + 2018-01-01 01:45:00 3.0 + 2018-01-01 02:00:00 3.0 + Freq: 15T, dtype: float64 """ return self._upsample('backfill', limit=limit) bfill = backfill From 31f0e0e114f7ce742c5f92c5d6e8cd6ab802f6ed Mon Sep 17 00:00:00 2001 From: Guilherme Beltramini Date: Fri, 9 Mar 2018 17:23:41 -0300 Subject: [PATCH 2/5] Add another example and line break --- pandas/core/resample.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index aa0ad40a2f86e..961395478fbe0 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -548,6 +548,15 @@ def backfill(self, limit=None): 2018-01-01 01:00:00 2 2018-01-01 02:00:00 3 Freq: H, dtype: int64 + + >>> s.resample('30min').backfill() + 2018-01-01 00:00:00 1 + 2018-01-01 00:30:00 2 + 2018-01-01 01:00:00 2 + 2018-01-01 01:30:00 3 + 2018-01-01 02:00:00 3 + Freq: 30T, dtype: int64 + >>> s.resample('15min').backfill(limit=2) 2018-01-01 00:00:00 1.0 2018-01-01 00:15:00 NaN From 179c4aa3e9e4037466dcb9fe2cfc9036bfbbf9b3 Mon Sep 17 00:00:00 2001 From: Guilherme Beltramini Date: Sat, 10 Mar 2018 11:59:56 -0300 Subject: [PATCH 3/5] Add better description, returns description and reference --- pandas/core/resample.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 961395478fbe0..de97920f55016 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -521,7 +521,11 @@ def backfill(self, limit=None): """ Backward fill the values. - Resample datetimelike data and fill backwards missing values if any. + In statistics, imputation is the process of replacing missing data with + substituted values. When resampling data, missing values may appear + (e.g., when the resampling frequency is higher than the original + frequency). The backward fill will replace NA values with the next + non-NA value in the sequence. Parameters ---------- @@ -530,7 +534,8 @@ def backfill(self, limit=None): Returns ------- - an upsampled Series + Series + An upsampled Series with backward filled NA values. See Also -------- @@ -539,6 +544,10 @@ def backfill(self, limit=None): DataFrame.fillna : Fill NA/NaN values in the DataFrame using the specified method, which can be 'backfill'. + References + ---------- + .. [1] https://en.wikipedia.org/wiki/Imputation_(statistics) + Examples -------- >>> s = pd.Series([1, 2, 3], From 423893838ff522cc89dd53db5deba58ca88517c6 Mon Sep 17 00:00:00 2001 From: Guilherme Beltramini Date: Sat, 10 Mar 2018 12:17:41 -0300 Subject: [PATCH 4/5] Add citation --- pandas/core/resample.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index de97920f55016..68335cf04b9d6 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -522,8 +522,8 @@ def backfill(self, limit=None): Backward fill the values. In statistics, imputation is the process of replacing missing data with - substituted values. When resampling data, missing values may appear - (e.g., when the resampling frequency is higher than the original + substituted values [1]_. When resampling data, missing values may + appear (e.g., when the resampling frequency is higher than the original frequency). The backward fill will replace NA values with the next non-NA value in the sequence. From 4e616f7342261decc9da365fcd02882c228e5533 Mon Sep 17 00:00:00 2001 From: Guilherme Beltramini Date: Sun, 11 Mar 2018 20:30:07 -0300 Subject: [PATCH 5/5] DOC: Improve description, add example --- pandas/core/resample.py | 56 +++++++++++++++++++++++++++++++++++------ 1 file changed, 48 insertions(+), 8 deletions(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 68335cf04b9d6..4f9c22ca98f1a 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -519,13 +519,14 @@ def nearest(self, limit=None): def backfill(self, limit=None): """ - Backward fill the values. + Backward fill the new missing values in the resampled data. In statistics, imputation is the process of replacing missing data with substituted values [1]_. When resampling data, missing values may appear (e.g., when the resampling frequency is higher than the original - frequency). The backward fill will replace NA values with the next - non-NA value in the sequence. + frequency). The backward fill will replace NaN values that appeared in + the resampled data with the next value in the original sequence. + Missing values that existed in the orginal data will not be modified. Parameters ---------- @@ -534,14 +535,19 @@ def backfill(self, limit=None): Returns ------- - Series - An upsampled Series with backward filled NA values. + Series, DataFrame + An upsampled Series or DataFrame with backward filled NaN values. See Also -------- - Series.fillna : Fill NA/NaN values in the Series using the specified - method, which can be 'backfill'. - DataFrame.fillna : Fill NA/NaN values in the DataFrame using the + bfill : Alias of backfill. + fillna : Fill NaN values using the specified method, which can be + 'backfill'. + nearest : Fill NaN values with nearest neighbor starting from center. + pad : Forward fill NaN values. + pandas.Series.fillna : Fill NaN values in the Series using the + specified method, which can be 'backfill'. + pandas.DataFrame.fillna : Fill NaN values in the DataFrame using the specified method, which can be 'backfill'. References @@ -550,6 +556,9 @@ def backfill(self, limit=None): Examples -------- + + Resampling a Series: + >>> s = pd.Series([1, 2, 3], ... index=pd.date_range('20180101', periods=3, freq='h')) >>> s @@ -577,6 +586,37 @@ def backfill(self, limit=None): 2018-01-01 01:45:00 3.0 2018-01-01 02:00:00 3.0 Freq: 15T, dtype: float64 + + Resampling a DataFrame that has missing values: + + >>> df = pd.DataFrame({'a': [2, np.nan, 6], 'b': [1, 3, 5]}, + ... index=pd.date_range('20180101', periods=3, + ... freq='h')) + >>> df + a b + 2018-01-01 00:00:00 2.0 1 + 2018-01-01 01:00:00 NaN 3 + 2018-01-01 02:00:00 6.0 5 + + >>> df.resample('30min').backfill() + a b + 2018-01-01 00:00:00 2.0 1 + 2018-01-01 00:30:00 NaN 3 + 2018-01-01 01:00:00 NaN 3 + 2018-01-01 01:30:00 6.0 5 + 2018-01-01 02:00:00 6.0 5 + + >>> df.resample('15min').backfill(limit=2) + a b + 2018-01-01 00:00:00 2.0 1.0 + 2018-01-01 00:15:00 NaN NaN + 2018-01-01 00:30:00 NaN 3.0 + 2018-01-01 00:45:00 NaN 3.0 + 2018-01-01 01:00:00 NaN 3.0 + 2018-01-01 01:15:00 NaN NaN + 2018-01-01 01:30:00 6.0 5.0 + 2018-01-01 01:45:00 6.0 5.0 + 2018-01-01 02:00:00 6.0 5.0 """ return self._upsample('backfill', limit=limit) bfill = backfill