diff --git a/doc/source/basics.rst b/doc/source/basics.rst index 1f670fb7fb593..c750ffe2065e9 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -460,7 +460,9 @@ standard deviation 1), very concisely: xs_stand.std(1) Note that methods like :meth:`~DataFrame.cumsum` and :meth:`~DataFrame.cumprod` -preserve the location of NA values: +preserve the location of ``NaN`` values. This is somewhat different from +:meth:`~DataFrame.expanding` and :meth:`~DataFrame.rolling`. +For more details please see :ref:`this note `. .. ipython:: python diff --git a/doc/source/computation.rst b/doc/source/computation.rst index 1414d2dd3c8dc..f8adf33ec66d0 100644 --- a/doc/source/computation.rst +++ b/doc/source/computation.rst @@ -686,6 +686,8 @@ Method Summary :meth:`~Expanding.cov`, Unbiased covariance (binary) :meth:`~Expanding.corr`, Correlation (binary) +.. currentmodule:: pandas + Aside from not having a ``window`` parameter, these functions have the same interfaces as their ``.rolling`` counterparts. Like above, the parameters they all accept are: @@ -695,18 +697,34 @@ all accept are: ``min_periods`` non-null data points have been seen. - ``center``: boolean, whether to set the labels at the center (default is False) +.. _stats.moments.expanding.note: .. note:: The output of the ``.rolling`` and ``.expanding`` methods do not return a ``NaN`` if there are at least ``min_periods`` non-null values in the current - window. This differs from ``cumsum``, ``cumprod``, ``cummax``, and - ``cummin``, which return ``NaN`` in the output wherever a ``NaN`` is - encountered in the input. + window. This differs from :meth:`~DataFrame.cumsum`, + :meth:`~DataFrame.cumprod`, :meth:`~DataFrame.cummax`, + and :meth:`~DataFrame.cummin`, which return ``NaN`` in the output wherever + a ``NaN`` is encountered in the input. + + Please see the example below. In order to match the output of ``cumsum`` + with ``expanding``, use :meth:`~DataFrame.fillna`. + + .. ipython:: python + + sn = pd.Series([1,2,np.nan,3,np.nan,4]) + + sn.expanding().sum() + + sn.cumsum() + + sn.cumsum().fillna(method='ffill') + An expanding window statistic will be more stable (and less responsive) than its rolling window counterpart as the increasing window size decreases the relative impact of an individual data point. As an example, here is the -:meth:`~Expanding.mean` output for the previous time series dataset: +:meth:`~core.window.Expanding.mean` output for the previous time series dataset: .. ipython:: python :suppress: @@ -726,13 +744,14 @@ relative impact of an individual data point. As an example, here is the Exponentially Weighted Windows ------------------------------ +.. currentmodule:: pandas.core.window + A related set of functions are exponentially weighted versions of several of the above statistics. A similar interface to ``.rolling`` and ``.expanding`` is accessed -thru the ``.ewm`` method to receive an :class:`~pandas.core.window.EWM` object. +through the ``.ewm`` method to receive an :class:`~EWM` object. A number of expanding EW (exponentially weighted) methods are provided: -.. currentmodule:: pandas.core.window .. csv-table:: :header: "Function", "Description" diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8e295174771c4..88cb816a5582e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3296,12 +3296,16 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, return self._constructor(new_data).__finalize__(self) def ffill(self, axis=None, inplace=False, limit=None, downcast=None): - """Synonym for NDFrame.fillna(method='ffill')""" + """ + Synonym for :meth:`DataFrame.fillna(method='ffill') ` + """ return self.fillna(method='ffill', axis=axis, inplace=inplace, limit=limit, downcast=downcast) def bfill(self, axis=None, inplace=False, limit=None, downcast=None): - """Synonym for NDFrame.fillna(method='bfill')""" + """ + Synonym for :meth:`DataFrame.fillna(method='bfill') ` + """ return self.fillna(method='bfill', axis=axis, inplace=inplace, limit=limit, downcast=downcast) @@ -5359,16 +5363,18 @@ def compound(self, axis=None, skipna=None, level=None): cls.cummin = _make_cum_function( cls, 'cummin', name, name2, axis_descr, "cumulative minimum", - lambda y, axis: np.minimum.accumulate(y, axis), np.inf, np.nan) + lambda y, axis: np.minimum.accumulate(y, axis), "min", + np.inf, np.nan) cls.cumsum = _make_cum_function( cls, 'cumsum', name, name2, axis_descr, "cumulative sum", - lambda y, axis: y.cumsum(axis), 0., np.nan) + lambda y, axis: y.cumsum(axis), "sum", 0., np.nan) cls.cumprod = _make_cum_function( cls, 'cumprod', name, name2, axis_descr, "cumulative product", - lambda y, axis: y.cumprod(axis), 1., np.nan) + lambda y, axis: y.cumprod(axis), "prod", 1., np.nan) cls.cummax = _make_cum_function( cls, 'cummax', name, name2, axis_descr, "cumulative max", - lambda y, axis: np.maximum.accumulate(y, axis), -np.inf, np.nan) + lambda y, axis: np.maximum.accumulate(y, axis), "max", + -np.inf, np.nan) cls.sum = _make_stat_function( cls, 'sum', name, name2, axis_descr, @@ -5556,7 +5562,15 @@ def _doc_parms(cls): Returns ------- -%(outname)s : %(name1)s\n""" +%(outname)s : %(name1)s\n + + +See also +-------- +pandas.core.window.Expanding.%(accum_func_name)s : Similar functionality + but ignores ``NaN`` values. + +""" def _make_stat_function(cls, name, name1, name2, axis_descr, desc, f): @@ -5599,10 +5613,10 @@ def stat_func(self, axis=None, skipna=None, level=None, ddof=1, return set_function_name(stat_func, name, cls) -def _make_cum_function(cls, name, name1, name2, axis_descr, desc, accum_func, - mask_a, mask_b): +def _make_cum_function(cls, name, name1, name2, axis_descr, desc, + accum_func, accum_func_name, mask_a, mask_b): @Substitution(outname=name, desc=desc, name1=name1, name2=name2, - axis_descr=axis_descr) + axis_descr=axis_descr, accum_func_name=accum_func_name) @Appender("Return {0} over requested axis.".format(desc) + _cnum_doc) def cum_func(self, axis=None, skipna=True, *args, **kwargs):