DOC: update the pandas.core.resample.Resampler.fillna docstring (#20379)

prcastro · TomAugspurger · commit 670c2e437e90 · 2018-03-17T14:49:51.000-05:00
diff --git a/pandas/core/resample.py b/pandas/core/resample.py
@@ -624,18 +624,160 @@ def backfill(self, limit=None):
 
     def fillna(self, method, limit=None):
         """
-        Fill missing values
+        Fill missing values introduced by upsampling.
+
+        In statistics, imputation is the process of replacing missing data with
+        substituted values [1]_. When resampling data, missing values may
+        appear (e.g., when the resampling frequency is higher than the original
+        frequency).
+
+        Missing values that existed in the orginal data will
+        not be modified.
 
         Parameters
         ----------
-        method : str, method of resampling ('ffill', 'bfill')
+        method : {'pad', 'backfill', 'ffill', 'bfill', 'nearest'}
+            Method to use for filling holes in resampled data
+
+            * 'pad' or 'ffill': use previous valid observation to fill gap
+              (forward fill).
+            * 'backfill' or 'bfill': use next valid observation to fill gap.
+            * 'nearest': use nearest valid observation to fill gap.
+
         limit : integer, optional
-            limit of how many values to fill
+            Limit of how many consecutive missing values to fill.
+
+        Returns
+        -------
+        Series or DataFrame
+            An upsampled Series or DataFrame with missing values filled.
 
         See Also
         --------
-        Series.fillna
-        DataFrame.fillna
+        backfill : Backward fill NaN values in the resampled data.
+        pad : Forward fill NaN values in the resampled data.
+        nearest : Fill NaN values in the resampled data
+            with nearest neighbor starting from center.
+        pandas.Series.fillna : Fill NaN values in the Series using the
+            specified method, which can be 'bfill' and 'ffill'.
+        pandas.DataFrame.fillna : Fill NaN values in the DataFrame using the
+            specified method, which can be 'bfill' and 'ffill'.
+
+        Examples
+        --------
+        Resampling a Series:
+
+        >>> s = pd.Series([1, 2, 3],
+        ...               index=pd.date_range('20180101', periods=3, freq='h'))
+        >>> s
+        2018-01-01 00:00:00    1
+        2018-01-01 01:00:00    2
+        2018-01-01 02:00:00    3
+        Freq: H, dtype: int64
+
+        Without filling the missing values you get:
+
+        >>> s.resample("30min").asfreq()
+        2018-01-01 00:00:00    1.0
+        2018-01-01 00:30:00    NaN
+        2018-01-01 01:00:00    2.0
+        2018-01-01 01:30:00    NaN
+        2018-01-01 02:00:00    3.0
+        Freq: 30T, dtype: float64
+
+        >>> s.resample('30min').fillna("backfill")
+        2018-01-01 00:00:00    1
+        2018-01-01 00:30:00    2
+        2018-01-01 01:00:00    2
+        2018-01-01 01:30:00    3
+        2018-01-01 02:00:00    3
+        Freq: 30T, dtype: int64
+
+        >>> s.resample('15min').fillna("backfill", limit=2)
+        2018-01-01 00:00:00    1.0
+        2018-01-01 00:15:00    NaN
+        2018-01-01 00:30:00    2.0
+        2018-01-01 00:45:00    2.0
+        2018-01-01 01:00:00    2.0
+        2018-01-01 01:15:00    NaN
+        2018-01-01 01:30:00    3.0
+        2018-01-01 01:45:00    3.0
+        2018-01-01 02:00:00    3.0
+        Freq: 15T, dtype: float64
+
+        >>> s.resample('30min').fillna("pad")
+        2018-01-01 00:00:00    1
+        2018-01-01 00:30:00    1
+        2018-01-01 01:00:00    2
+        2018-01-01 01:30:00    2
+        2018-01-01 02:00:00    3
+        Freq: 30T, dtype: int64
+
+        >>> s.resample('30min').fillna("nearest")
+        2018-01-01 00:00:00    1
+        2018-01-01 00:30:00    2
+        2018-01-01 01:00:00    2
+        2018-01-01 01:30:00    3
+        2018-01-01 02:00:00    3
+        Freq: 30T, dtype: int64
+
+        Missing values present before the upsampling are not affected.
+
+        >>> sm = pd.Series([1, None, 3],
+        ...               index=pd.date_range('20180101', periods=3, freq='h'))
+        >>> sm
+        2018-01-01 00:00:00    1.0
+        2018-01-01 01:00:00    NaN
+        2018-01-01 02:00:00    3.0
+        Freq: H, dtype: float64
+
+        >>> sm.resample('30min').fillna('backfill')
+        2018-01-01 00:00:00    1.0
+        2018-01-01 00:30:00    NaN
+        2018-01-01 01:00:00    NaN
+        2018-01-01 01:30:00    3.0
+        2018-01-01 02:00:00    3.0
+        Freq: 30T, dtype: float64
+
+        >>> sm.resample('30min').fillna('pad')
+        2018-01-01 00:00:00    1.0
+        2018-01-01 00:30:00    1.0
+        2018-01-01 01:00:00    NaN
+        2018-01-01 01:30:00    NaN
+        2018-01-01 02:00:00    3.0
+        Freq: 30T, dtype: float64
+
+        >>> sm.resample('30min').fillna('nearest')
+        2018-01-01 00:00:00    1.0
+        2018-01-01 00:30:00    NaN
+        2018-01-01 01:00:00    NaN
+        2018-01-01 01:30:00    3.0
+        2018-01-01 02:00:00    3.0
+        Freq: 30T, dtype: float64
+
+        DataFrame resampling is done column-wise. All the same options are
+        available.
+
+        >>> df = pd.DataFrame({'a': [2, np.nan, 6], 'b': [1, 3, 5]},
+        ...                   index=pd.date_range('20180101', periods=3,
+        ...                                       freq='h'))
+        >>> df
+                               a  b
+        2018-01-01 00:00:00  2.0  1
+        2018-01-01 01:00:00  NaN  3
+        2018-01-01 02:00:00  6.0  5
+
+        >>> df.resample('30min').fillna("bfill")
+                               a  b
+        2018-01-01 00:00:00  2.0  1
+        2018-01-01 00:30:00  NaN  3
+        2018-01-01 01:00:00  NaN  3
+        2018-01-01 01:30:00  6.0  5
+        2018-01-01 02:00:00  6.0  5
+
+        References
+        ----------
+        .. [1] https://en.wikipedia.org/wiki/Imputation_(statistics)
         """
         return self._upsample(method, limit=limit)