diff --git a/doc/source/computation.rst b/doc/source/computation.rst index 06afa440aa26c..a64542fa71705 100644 --- a/doc/source/computation.rst +++ b/doc/source/computation.rst @@ -209,19 +209,12 @@ Window Functions .. currentmodule:: pandas.core.window -.. warning:: - - Prior to version 0.18.0, ``pd.rolling_*``, ``pd.expanding_*``, and ``pd.ewm*`` were module level - functions and are now deprecated. These are replaced by using the :class:`~pandas.core.window.Rolling`, :class:`~pandas.core.window.Expanding` and :class:`~pandas.core.window.EWM`. objects and a corresponding method call. - - The deprecation warning will show the new syntax, see an example :ref:`here `. - -For working with data, a number of windows functions are provided for +For working with data, a number of window functions are provided for computing common *window* or *rolling* statistics. Among these are count, sum, mean, median, correlation, variance, covariance, standard deviation, skewness, and kurtosis. -Starting in version 0.18.1, the ``rolling()`` and ``expanding()`` +The ``rolling()`` and ``expanding()`` functions can be used directly from DataFrameGroupBy objects, see the :ref:`groupby docs `. diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 592c0788070a1..2bd2bb199bf1f 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -361,6 +361,8 @@ Removal of prior version deprecations/changes - The ``labels`` attribute of the ``Categorical`` class has been removed in favor of :attribute:`Categorical.codes` (:issue:`7768`) - The ``flavor`` parameter have been removed from func:`to_sql` method (:issue:`13611`) - The modules `pandas.tools.hashing` and `pandas.util.hashing` have been removed (:issue:`16223`) +- The top-level functions ``pd.rolling_*``, ``pd.expanding_*`` and ``pd.ewm*`` have been removed (Deprecated since v0.18). + Instead, use the DataFrame/Series methods :attr:`~DataFrame.rolling`, :attr:`~DataFrame.expanding` and :attr:`~DataFrame.ewm` (:issue:`18723`) .. _whatsnew_0230.performance: diff --git a/pandas/__init__.py b/pandas/__init__.py index 78501620d780b..97ae73174c09c 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -41,7 +41,6 @@ from pandas.core.api import * from pandas.core.sparse.api import * -from pandas.stats.api import * from pandas.tseries.api import * from pandas.core.computation.api import * from pandas.core.reshape.api import * diff --git a/pandas/stats/__init__.py b/pandas/stats/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/pandas/stats/api.py b/pandas/stats/api.py deleted file mode 100644 index 2a11456d4f9e5..0000000000000 --- a/pandas/stats/api.py +++ /dev/null @@ -1,7 +0,0 @@ -""" -Common namespace of statistical functions -""" - -# flake8: noqa - -from pandas.stats.moments import * diff --git a/pandas/stats/moments.py b/pandas/stats/moments.py deleted file mode 100644 index 1cd98feb05ea0..0000000000000 --- a/pandas/stats/moments.py +++ /dev/null @@ -1,855 +0,0 @@ -""" -Provides rolling statistical moments and related descriptive -statistics implemented in Cython -""" -from __future__ import division - -import warnings -import numpy as np -from pandas.core.dtypes.common import is_scalar -from pandas.core.api import DataFrame, Series -from pandas.util._decorators import Substitution, Appender - -__all__ = ['rolling_count', 'rolling_max', 'rolling_min', - 'rolling_sum', 'rolling_mean', 'rolling_std', 'rolling_cov', - 'rolling_corr', 'rolling_var', 'rolling_skew', 'rolling_kurt', - 'rolling_quantile', 'rolling_median', 'rolling_apply', - 'rolling_window', - 'ewma', 'ewmvar', 'ewmstd', 'ewmvol', 'ewmcorr', 'ewmcov', - 'expanding_count', 'expanding_max', 'expanding_min', - 'expanding_sum', 'expanding_mean', 'expanding_std', - 'expanding_cov', 'expanding_corr', 'expanding_var', - 'expanding_skew', 'expanding_kurt', 'expanding_quantile', - 'expanding_median', 'expanding_apply'] - -# ----------------------------------------------------------------------------- -# Docs - -# The order of arguments for the _doc_template is: -# (header, args, kwargs, returns, notes) - -_doc_template = """ -%s - -Parameters ----------- -%s%s -Returns -------- -%s -%s -""" - -_roll_kw = """window : int - Size of the moving window. This is the number of observations used for - calculating the statistic. -min_periods : int, default None - Minimum number of observations in window required to have a value - (otherwise result is NA). -freq : string or DateOffset object, optional (default None) - Frequency to conform the data to before computing the statistic. Specified - as a frequency string or DateOffset object. -center : boolean, default False - Set the labels at the center of the window. -how : string, default '%s' - Method for down- or re-sampling -""" - -_roll_notes = r""" -Notes ------ -By default, the result is set to the right edge of the window. This can be -changed to the center of the window by setting ``center=True``. - -The `freq` keyword is used to conform time series data to a specified -frequency by resampling the data. This is done with the default parameters -of :meth:`~pandas.Series.resample` (i.e. using the `mean`). -""" - - -_ewm_kw = r"""com : float, optional - Specify decay in terms of center of mass, - :math:`\alpha = 1 / (1 + com),\text{ for } com \geq 0` -span : float, optional - Specify decay in terms of span, - :math:`\alpha = 2 / (span + 1),\text{ for } span \geq 1` -halflife : float, optional - Specify decay in terms of half-life, - :math:`\alpha = 1 - exp(log(0.5) / halflife),\text{ for } halflife > 0` -alpha : float, optional - Specify smoothing factor :math:`\alpha` directly, - :math:`0 < \alpha \leq 1` - - .. versionadded:: 0.18.0 - -min_periods : int, default 0 - Minimum number of observations in window required to have a value - (otherwise result is NA). -freq : None or string alias / date offset object, default=None - Frequency to conform to before computing statistic -adjust : boolean, default True - Divide by decaying adjustment factor in beginning periods to account for - imbalance in relative weightings (viewing EWMA as a moving average) -how : string, default 'mean' - Method for down- or re-sampling -ignore_na : boolean, default False - Ignore missing values when calculating weights; - specify True to reproduce pre-0.15.0 behavior -""" - -_ewm_notes = r""" -Notes ------ -Exactly one of center of mass, span, half-life, and alpha must be provided. -Allowed values and relationship between the parameters are specified in the -parameter descriptions above; see the link at the end of this section for -a detailed explanation. - -When adjust is True (default), weighted averages are calculated using weights - (1-alpha)**(n-1), (1-alpha)**(n-2), ..., 1-alpha, 1. - -When adjust is False, weighted averages are calculated recursively as: - weighted_average[0] = arg[0]; - weighted_average[i] = (1-alpha)*weighted_average[i-1] + alpha*arg[i]. - -When ignore_na is False (default), weights are based on absolute positions. -For example, the weights of x and y used in calculating the final weighted -average of [x, None, y] are (1-alpha)**2 and 1 (if adjust is True), and -(1-alpha)**2 and alpha (if adjust is False). - -When ignore_na is True (reproducing pre-0.15.0 behavior), weights are based on -relative positions. For example, the weights of x and y used in calculating -the final weighted average of [x, None, y] are 1-alpha and 1 (if adjust is -True), and 1-alpha and alpha (if adjust is False). - -More details can be found at -http://pandas.pydata.org/pandas-docs/stable/computation.html#exponentially-weighted-windows -""" - -_expanding_kw = """min_periods : int, default None - Minimum number of observations in window required to have a value - (otherwise result is NA). -freq : string or DateOffset object, optional (default None) - Frequency to conform the data to before computing the statistic. Specified - as a frequency string or DateOffset object. -""" - - -_type_of_input_retval = "y : type of input argument" - -_flex_retval = """y : type depends on inputs - DataFrame / DataFrame -> DataFrame (matches on columns) or Panel (pairwise) - DataFrame / Series -> Computes result for each column - Series / Series -> Series""" - -_pairwise_retval = "y : Panel whose items are df1.index values" - -_unary_arg = "arg : Series, DataFrame\n" - -_binary_arg_flex = """arg1 : Series, DataFrame, or ndarray -arg2 : Series, DataFrame, or ndarray, optional - if not supplied then will default to arg1 and produce pairwise output -""" - -_binary_arg = """arg1 : Series, DataFrame, or ndarray -arg2 : Series, DataFrame, or ndarray -""" - -_pairwise_arg = """df1 : DataFrame -df2 : DataFrame -""" - -_pairwise_kw = """pairwise : bool, default False - If False then only matching columns between arg1 and arg2 will be used and - the output will be a DataFrame. - If True then all pairwise combinations will be calculated and the output - will be a Panel in the case of DataFrame inputs. In the case of missing - elements, only complete pairwise observations will be used. -""" - -_ddof_kw = """ddof : int, default 1 - Delta Degrees of Freedom. The divisor used in calculations - is ``N - ddof``, where ``N`` represents the number of elements. -""" - -_bias_kw = r"""bias : boolean, default False - Use a standard estimation bias correction -""" - - -def ensure_compat(dispatch, name, arg, func_kw=None, *args, **kwargs): - """ - wrapper function to dispatch to the appropriate window functions - wraps/unwraps ndarrays for compat - - can be removed when ndarray support is removed - """ - is_ndarray = isinstance(arg, np.ndarray) - if is_ndarray: - if arg.ndim == 1: - arg = Series(arg) - elif arg.ndim == 2: - arg = DataFrame(arg) - else: - raise AssertionError("cannot support ndim > 2 for ndarray compat") - - warnings.warn("pd.{dispatch}_{name} is deprecated for ndarrays and " - "will be removed " - "in a future version" - .format(dispatch=dispatch, name=name), - FutureWarning, stacklevel=3) - - # get the functional keywords here - if func_kw is None: - func_kw = [] - kwds = {} - for k in func_kw: - value = kwargs.pop(k, None) - if value is not None: - kwds[k] = value - - # TODO: the below is only in place temporary until this module is removed. - kwargs.pop('freq', None) # freq removed in 0.23 - # how is a keyword that if not-None should be in kwds - how = kwargs.pop('how', None) - if how is not None: - kwds['how'] = how - - r = getattr(arg, dispatch)(**kwargs) - - if not is_ndarray: - - # give a helpful deprecation message - # with copy-pastable arguments - pargs = ','.join("{a}={b}".format(a=a, b=b) - for a, b in kwargs.items() if b is not None) - aargs = ','.join(args) - if len(aargs): - aargs += ',' - - def f(a, b): - if is_scalar(b): - return "{a}={b}".format(a=a, b=b) - return "{a}=<{b}>".format(a=a, b=type(b).__name__) - aargs = ','.join(f(a, b) for a, b in kwds.items() if b is not None) - warnings.warn("pd.{dispatch}_{name} is deprecated for {klass} " - "and will be removed in a future version, replace with " - "\n\t{klass}.{dispatch}({pargs}).{name}({aargs})" - .format(klass=type(arg).__name__, pargs=pargs, - aargs=aargs, dispatch=dispatch, name=name), - FutureWarning, stacklevel=3) - - result = getattr(r, name)(*args, **kwds) - - if is_ndarray: - result = result.values - return result - - -def rolling_count(arg, window, **kwargs): - """ - Rolling count of number of non-NaN observations inside provided window. - - Parameters - ---------- - arg : DataFrame or numpy ndarray-like - window : int - Size of the moving window. This is the number of observations used for - calculating the statistic. - freq : string or DateOffset object, optional (default None) - Frequency to conform the data to before computing the - statistic. Specified as a frequency string or DateOffset object. - center : boolean, default False - Whether the label should correspond with center of window - how : string, default 'mean' - Method for down- or re-sampling - - Returns - ------- - rolling_count : type of caller - - Notes - ----- - The `freq` keyword is used to conform time series data to a specified - frequency by resampling the data. This is done with the default parameters - of :meth:`~pandas.Series.resample` (i.e. using the `mean`). - - To learn more about the frequency strings, please see `this link - `__. - """ - return ensure_compat('rolling', 'count', arg, window=window, **kwargs) - - -@Substitution("Unbiased moving covariance.", _binary_arg_flex, - _roll_kw % 'None' + _pairwise_kw + _ddof_kw, _flex_retval, - _roll_notes) -@Appender(_doc_template) -def rolling_cov(arg1, arg2=None, window=None, pairwise=None, **kwargs): - if window is None and isinstance(arg2, (int, float)): - window = arg2 - arg2 = arg1 - pairwise = True if pairwise is None else pairwise # only default unset - elif arg2 is None: - arg2 = arg1 - pairwise = True if pairwise is None else pairwise # only default unset - return ensure_compat('rolling', - 'cov', - arg1, - other=arg2, - window=window, - pairwise=pairwise, - func_kw=['other', 'pairwise', 'ddof'], - **kwargs) - - -@Substitution("Moving sample correlation.", _binary_arg_flex, - _roll_kw % 'None' + _pairwise_kw, _flex_retval, _roll_notes) -@Appender(_doc_template) -def rolling_corr(arg1, arg2=None, window=None, pairwise=None, **kwargs): - if window is None and isinstance(arg2, (int, float)): - window = arg2 - arg2 = arg1 - pairwise = True if pairwise is None else pairwise # only default unset - elif arg2 is None: - arg2 = arg1 - pairwise = True if pairwise is None else pairwise # only default unset - return ensure_compat('rolling', - 'corr', - arg1, - other=arg2, - window=window, - pairwise=pairwise, - func_kw=['other', 'pairwise'], - **kwargs) - - -# ----------------------------------------------------------------------------- -# Exponential moving moments - - -@Substitution("Exponentially-weighted moving average", _unary_arg, _ewm_kw, - _type_of_input_retval, _ewm_notes) -@Appender(_doc_template) -def ewma(arg, com=None, span=None, halflife=None, alpha=None, min_periods=0, - freq=None, adjust=True, how=None, ignore_na=False): - return ensure_compat('ewm', - 'mean', - arg, - com=com, - span=span, - halflife=halflife, - alpha=alpha, - min_periods=min_periods, - freq=freq, - adjust=adjust, - how=how, - ignore_na=ignore_na) - - -@Substitution("Exponentially-weighted moving variance", _unary_arg, - _ewm_kw + _bias_kw, _type_of_input_retval, _ewm_notes) -@Appender(_doc_template) -def ewmvar(arg, com=None, span=None, halflife=None, alpha=None, min_periods=0, - bias=False, freq=None, how=None, ignore_na=False, adjust=True): - return ensure_compat('ewm', - 'var', - arg, - com=com, - span=span, - halflife=halflife, - alpha=alpha, - min_periods=min_periods, - freq=freq, - adjust=adjust, - how=how, - ignore_na=ignore_na, - bias=bias, - func_kw=['bias']) - - -@Substitution("Exponentially-weighted moving std", _unary_arg, - _ewm_kw + _bias_kw, _type_of_input_retval, _ewm_notes) -@Appender(_doc_template) -def ewmstd(arg, com=None, span=None, halflife=None, alpha=None, min_periods=0, - bias=False, freq=None, how=None, ignore_na=False, adjust=True): - return ensure_compat('ewm', - 'std', - arg, - com=com, - span=span, - halflife=halflife, - alpha=alpha, - min_periods=min_periods, - freq=freq, - adjust=adjust, - how=how, - ignore_na=ignore_na, - bias=bias, - func_kw=['bias']) - - -ewmvol = ewmstd - - -@Substitution("Exponentially-weighted moving covariance", _binary_arg_flex, - _ewm_kw + _pairwise_kw, _type_of_input_retval, _ewm_notes) -@Appender(_doc_template) -def ewmcov(arg1, arg2=None, com=None, span=None, halflife=None, alpha=None, - min_periods=0, bias=False, freq=None, pairwise=None, how=None, - ignore_na=False, adjust=True): - if arg2 is None: - arg2 = arg1 - pairwise = True if pairwise is None else pairwise - elif isinstance(arg2, (int, float)) and com is None: - com = arg2 - arg2 = arg1 - pairwise = True if pairwise is None else pairwise - - return ensure_compat('ewm', - 'cov', - arg1, - other=arg2, - com=com, - span=span, - halflife=halflife, - alpha=alpha, - min_periods=min_periods, - bias=bias, - freq=freq, - how=how, - ignore_na=ignore_na, - adjust=adjust, - pairwise=pairwise, - func_kw=['other', 'pairwise', 'bias']) - - -@Substitution("Exponentially-weighted moving correlation", _binary_arg_flex, - _ewm_kw + _pairwise_kw, _type_of_input_retval, _ewm_notes) -@Appender(_doc_template) -def ewmcorr(arg1, arg2=None, com=None, span=None, halflife=None, alpha=None, - min_periods=0, freq=None, pairwise=None, how=None, ignore_na=False, - adjust=True): - if arg2 is None: - arg2 = arg1 - pairwise = True if pairwise is None else pairwise - elif isinstance(arg2, (int, float)) and com is None: - com = arg2 - arg2 = arg1 - pairwise = True if pairwise is None else pairwise - return ensure_compat('ewm', - 'corr', - arg1, - other=arg2, - com=com, - span=span, - halflife=halflife, - alpha=alpha, - min_periods=min_periods, - freq=freq, - how=how, - ignore_na=ignore_na, - adjust=adjust, - pairwise=pairwise, - func_kw=['other', 'pairwise']) - -# --------------------------------------------------------------------- -# Python interface to Cython functions - - -def _rolling_func(name, desc, how=None, func_kw=None, additional_kw=''): - if how is None: - how_arg_str = 'None' - else: - how_arg_str = "'{how}".format(how=how) - - @Substitution(desc, _unary_arg, _roll_kw % how_arg_str + additional_kw, - _type_of_input_retval, _roll_notes) - @Appender(_doc_template) - def f(arg, window, min_periods=None, freq=None, center=False, - **kwargs): - - return ensure_compat('rolling', - name, - arg, - window=window, - min_periods=min_periods, - freq=freq, - center=center, - func_kw=func_kw, - **kwargs) - return f - - -rolling_max = _rolling_func('max', 'Moving maximum.', how='max') -rolling_min = _rolling_func('min', 'Moving minimum.', how='min') -rolling_sum = _rolling_func('sum', 'Moving sum.') -rolling_mean = _rolling_func('mean', 'Moving mean.') -rolling_median = _rolling_func('median', 'Moving median.', how='median') -rolling_std = _rolling_func('std', 'Moving standard deviation.', - func_kw=['ddof'], - additional_kw=_ddof_kw) -rolling_var = _rolling_func('var', 'Moving variance.', - func_kw=['ddof'], - additional_kw=_ddof_kw) -rolling_skew = _rolling_func('skew', 'Unbiased moving skewness.') -rolling_kurt = _rolling_func('kurt', 'Unbiased moving kurtosis.') - - -def rolling_quantile(arg, window, quantile, min_periods=None, freq=None, - center=False): - """Moving quantile. - - Parameters - ---------- - arg : Series, DataFrame - window : int - Size of the moving window. This is the number of observations used for - calculating the statistic. - quantile : float - 0 <= quantile <= 1 - min_periods : int, default None - Minimum number of observations in window required to have a value - (otherwise result is NA). - freq : string or DateOffset object, optional (default None) - Frequency to conform the data to before computing the - statistic. Specified as a frequency string or DateOffset object. - center : boolean, default False - Whether the label should correspond with center of window - - Returns - ------- - y : type of input argument - - Notes - ----- - By default, the result is set to the right edge of the window. This can be - changed to the center of the window by setting ``center=True``. - - The `freq` keyword is used to conform time series data to a specified - frequency by resampling the data. This is done with the default parameters - of :meth:`~pandas.Series.resample` (i.e. using the `mean`). - - To learn more about the frequency strings, please see `this link - `__. - """ - return ensure_compat('rolling', - 'quantile', - arg, - window=window, - freq=freq, - center=center, - min_periods=min_periods, - func_kw=['quantile'], - quantile=quantile) - - -def rolling_apply(arg, window, func, min_periods=None, freq=None, - center=False, args=(), kwargs={}): - """Generic moving function application. - - Parameters - ---------- - arg : Series, DataFrame - window : int - Size of the moving window. This is the number of observations used for - calculating the statistic. - func : function - Must produce a single value from an ndarray input - min_periods : int, default None - Minimum number of observations in window required to have a value - (otherwise result is NA). - freq : string or DateOffset object, optional (default None) - Frequency to conform the data to before computing the - statistic. Specified as a frequency string or DateOffset object. - center : boolean, default False - Whether the label should correspond with center of window - args : tuple - Passed on to func - kwargs : dict - Passed on to func - - Returns - ------- - y : type of input argument - - Notes - ----- - By default, the result is set to the right edge of the window. This can be - changed to the center of the window by setting ``center=True``. - - The `freq` keyword is used to conform time series data to a specified - frequency by resampling the data. This is done with the default parameters - of :meth:`~pandas.Series.resample` (i.e. using the `mean`). - - To learn more about the frequency strings, please see `this link - `__. - """ - return ensure_compat('rolling', - 'apply', - arg, - window=window, - freq=freq, - center=center, - min_periods=min_periods, - func_kw=['func', 'args', 'kwargs'], - func=func, - args=args, - kwargs=kwargs) - - -def rolling_window(arg, window=None, win_type=None, min_periods=None, - freq=None, center=False, mean=True, - axis=0, how=None, **kwargs): - """ - Applies a moving window of type ``window_type`` and size ``window`` - on the data. - - Parameters - ---------- - arg : Series, DataFrame - window : int or ndarray - Weighting window specification. If the window is an integer, then it is - treated as the window length and win_type is required - win_type : str, default None - Window type (see Notes) - min_periods : int, default None - Minimum number of observations in window required to have a value - (otherwise result is NA). - freq : string or DateOffset object, optional (default None) - Frequency to conform the data to before computing the - statistic. Specified as a frequency string or DateOffset object. - center : boolean, default False - Whether the label should correspond with center of window - mean : boolean, default True - If True computes weighted mean, else weighted sum - axis : {0, 1}, default 0 - how : string, default 'mean' - Method for down- or re-sampling - - Returns - ------- - y : type of input argument - - Notes - ----- - The recognized window types are: - - * ``boxcar`` - * ``triang`` - * ``blackman`` - * ``hamming`` - * ``bartlett`` - * ``parzen`` - * ``bohman`` - * ``blackmanharris`` - * ``nuttall`` - * ``barthann`` - * ``kaiser`` (needs beta) - * ``gaussian`` (needs std) - * ``general_gaussian`` (needs power, width) - * ``slepian`` (needs width). - - By default, the result is set to the right edge of the window. This can be - changed to the center of the window by setting ``center=True``. - - The `freq` keyword is used to conform time series data to a specified - frequency by resampling the data. This is done with the default parameters - of :meth:`~pandas.Series.resample` (i.e. using the `mean`). - - To learn more about the frequency strings, please see `this link - `__. - """ - func = 'mean' if mean else 'sum' - return ensure_compat('rolling', - func, - arg, - window=window, - win_type=win_type, - freq=freq, - center=center, - min_periods=min_periods, - axis=axis, - func_kw=kwargs.keys(), - **kwargs) - - -def _expanding_func(name, desc, func_kw=None, additional_kw=''): - @Substitution(desc, _unary_arg, _expanding_kw + additional_kw, - _type_of_input_retval, "") - @Appender(_doc_template) - def f(arg, min_periods=1, freq=None, **kwargs): - return ensure_compat('expanding', - name, - arg, - min_periods=min_periods, - func_kw=func_kw, - **kwargs) - return f - - -expanding_max = _expanding_func('max', 'Expanding maximum.') -expanding_min = _expanding_func('min', 'Expanding minimum.') -expanding_sum = _expanding_func('sum', 'Expanding sum.') -expanding_mean = _expanding_func('mean', 'Expanding mean.') -expanding_median = _expanding_func('median', 'Expanding median.') - -expanding_std = _expanding_func('std', 'Expanding standard deviation.', - func_kw=['ddof'], - additional_kw=_ddof_kw) -expanding_var = _expanding_func('var', 'Expanding variance.', - func_kw=['ddof'], - additional_kw=_ddof_kw) -expanding_skew = _expanding_func('skew', 'Unbiased expanding skewness.') -expanding_kurt = _expanding_func('kurt', 'Unbiased expanding kurtosis.') - - -def expanding_count(arg, freq=None): - """ - Expanding count of number of non-NaN observations. - - Parameters - ---------- - arg : DataFrame or numpy ndarray-like - freq : string or DateOffset object, optional (default None) - Frequency to conform the data to before computing the - statistic. Specified as a frequency string or DateOffset object. - - Returns - ------- - expanding_count : type of caller - - Notes - ----- - The `freq` keyword is used to conform time series data to a specified - frequency by resampling the data. This is done with the default parameters - of :meth:`~pandas.Series.resample` (i.e. using the `mean`). - - To learn more about the frequency strings, please see `this link - `__. - """ - return ensure_compat('expanding', 'count', arg, freq=freq) - - -def expanding_quantile(arg, quantile, min_periods=1, freq=None): - """Expanding quantile. - - Parameters - ---------- - arg : Series, DataFrame - quantile : float - 0 <= quantile <= 1 - min_periods : int, default None - Minimum number of observations in window required to have a value - (otherwise result is NA). - freq : string or DateOffset object, optional (default None) - Frequency to conform the data to before computing the - statistic. Specified as a frequency string or DateOffset object. - - Returns - ------- - y : type of input argument - - Notes - ----- - The `freq` keyword is used to conform time series data to a specified - frequency by resampling the data. This is done with the default parameters - of :meth:`~pandas.Series.resample` (i.e. using the `mean`). - - To learn more about the frequency strings, please see `this link - `__. - """ - return ensure_compat('expanding', - 'quantile', - arg, - freq=freq, - min_periods=min_periods, - func_kw=['quantile'], - quantile=quantile) - - -@Substitution("Unbiased expanding covariance.", _binary_arg_flex, - _expanding_kw + _pairwise_kw + _ddof_kw, _flex_retval, "") -@Appender(_doc_template) -def expanding_cov(arg1, arg2=None, min_periods=1, freq=None, - pairwise=None, ddof=1): - if arg2 is None: - arg2 = arg1 - pairwise = True if pairwise is None else pairwise - elif isinstance(arg2, (int, float)) and min_periods is None: - min_periods = arg2 - arg2 = arg1 - pairwise = True if pairwise is None else pairwise - return ensure_compat('expanding', - 'cov', - arg1, - other=arg2, - min_periods=min_periods, - pairwise=pairwise, - freq=freq, - ddof=ddof, - func_kw=['other', 'pairwise', 'ddof']) - - -@Substitution("Expanding sample correlation.", _binary_arg_flex, - _expanding_kw + _pairwise_kw, _flex_retval, "") -@Appender(_doc_template) -def expanding_corr(arg1, arg2=None, min_periods=1, freq=None, pairwise=None): - if arg2 is None: - arg2 = arg1 - pairwise = True if pairwise is None else pairwise - elif isinstance(arg2, (int, float)) and min_periods is None: - min_periods = arg2 - arg2 = arg1 - pairwise = True if pairwise is None else pairwise - return ensure_compat('expanding', - 'corr', - arg1, - other=arg2, - min_periods=min_periods, - pairwise=pairwise, - freq=freq, - func_kw=['other', 'pairwise', 'ddof']) - - -def expanding_apply(arg, func, min_periods=1, freq=None, - args=(), kwargs={}): - """Generic expanding function application. - - Parameters - ---------- - arg : Series, DataFrame - func : function - Must produce a single value from an ndarray input - min_periods : int, default None - Minimum number of observations in window required to have a value - (otherwise result is NA). - freq : string or DateOffset object, optional (default None) - Frequency to conform the data to before computing the - statistic. Specified as a frequency string or DateOffset object. - args : tuple - Passed on to func - kwargs : dict - Passed on to func - - Returns - ------- - y : type of input argument - - Notes - ----- - The `freq` keyword is used to conform time series data to a specified - frequency by resampling the data. This is done with the default parameters - of :meth:`~pandas.Series.resample` (i.e. using the `mean`). - - To learn more about the frequency strings, please see `this link - `__. - """ - return ensure_compat('expanding', - 'apply', - arg, - freq=freq, - min_periods=min_periods, - func_kw=['func', 'args', 'kwargs'], - func=func, - args=args, - kwargs=kwargs) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index c20767b09178c..ea6c250420b13 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -35,8 +35,7 @@ class TestPDApi(Base): 'util', 'options', 'io'] # these are already deprecated; awaiting removal - deprecated_modules = ['stats', 'datetools', 'parser', - 'json', 'lib', 'tslib'] + deprecated_modules = ['datetools', 'parser', 'json', 'lib', 'tslib'] # misc misc = ['IndexSlice', 'NaT'] @@ -91,19 +90,7 @@ class TestPDApi(Base): deprecated_funcs_in_future = [] # these are already deprecated; awaiting removal - deprecated_funcs = ['ewma', 'ewmcorr', 'ewmcov', 'ewmstd', 'ewmvar', - 'ewmvol', 'expanding_apply', 'expanding_corr', - 'expanding_count', 'expanding_cov', 'expanding_kurt', - 'expanding_max', 'expanding_mean', 'expanding_median', - 'expanding_min', 'expanding_quantile', - 'expanding_skew', 'expanding_std', 'expanding_sum', - 'expanding_var', 'rolling_apply', - 'rolling_corr', 'rolling_count', 'rolling_cov', - 'rolling_kurt', 'rolling_max', 'rolling_mean', - 'rolling_median', 'rolling_min', 'rolling_quantile', - 'rolling_skew', 'rolling_std', 'rolling_sum', - 'rolling_var', 'rolling_window', - 'pnow', 'match', 'groupby', 'get_store', + deprecated_funcs = ['pnow', 'match', 'groupby', 'get_store', 'plot_params', 'scatter_matrix'] def test_api(self): diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 6f9e872526d0a..22526d14a7168 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -1,6 +1,5 @@ from itertools import product import pytest -import sys import warnings from warnings import catch_warnings @@ -9,16 +8,15 @@ import numpy as np import pandas as pd -from pandas import (Series, DataFrame, bdate_range, isna, - notna, concat, Timestamp, Index) -import pandas.stats.moments as mom +from pandas import (Series, DataFrame, bdate_range, + isna, notna, concat, Timestamp, Index) import pandas.core.window as rwindow import pandas.tseries.offsets as offsets from pandas.core.base import SpecificationError from pandas.errors import UnsupportedFunctionCall import pandas.util.testing as tm import pandas.util._test_decorators as td -from pandas.compat import range, zip, PY3 +from pandas.compat import range, zip N, K = 100, 10 @@ -610,19 +608,6 @@ def test_numpy_compat(self): getattr(e, func), dtype=np.float64) -class TestDeprecations(Base): - """ test that we are catching deprecation warnings """ - - def setup_method(self, method): - self._create_data() - - def test_deprecations(self): - - with catch_warnings(record=True): - mom.rolling_mean(np.ones(10), 3, center=True, axis=0) - mom.rolling_mean(Series(np.ones(10)), 3, center=True, axis=0) - - # gh-12373 : rolling functions error on float32 data # make sure rolling functions works for different dtypes # @@ -863,72 +848,55 @@ def test_centered_axis_validation(self): .rolling(window=3, center=True, axis=2).mean()) def test_rolling_sum(self): - self._check_moment_func(mom.rolling_sum, np.nansum, name='sum', + self._check_moment_func(np.nansum, name='sum', zero_min_periods_equal=False) def test_rolling_count(self): counter = lambda x: np.isfinite(x).astype(float).sum() - self._check_moment_func(mom.rolling_count, counter, name='count', - has_min_periods=False, preserve_nan=False, + self._check_moment_func(counter, name='count', has_min_periods=False, fill_value=0) def test_rolling_mean(self): - self._check_moment_func(mom.rolling_mean, np.mean, name='mean') + self._check_moment_func(np.mean, name='mean') @td.skip_if_no_scipy def test_cmov_mean(self): # GH 8238 vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48]) - xp = np.array([np.nan, np.nan, 9.962, 11.27, 11.564, 12.516, 12.818, - 12.952, np.nan, np.nan]) - - with catch_warnings(record=True): - rs = mom.rolling_mean(vals, 5, center=True) - tm.assert_almost_equal(xp, rs) - - xp = Series(rs) - rs = Series(vals).rolling(5, center=True).mean() - tm.assert_series_equal(xp, rs) + result = Series(vals).rolling(5, center=True).mean() + expected = Series([np.nan, np.nan, 9.962, 11.27, 11.564, 12.516, + 12.818, 12.952, np.nan, np.nan]) + tm.assert_series_equal(expected, result) @td.skip_if_no_scipy def test_cmov_window(self): # GH 8238 vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48]) - xp = np.array([np.nan, np.nan, 9.962, 11.27, 11.564, 12.516, 12.818, - 12.952, np.nan, np.nan]) - - with catch_warnings(record=True): - rs = mom.rolling_window(vals, 5, 'boxcar', center=True) - tm.assert_almost_equal(xp, rs) - - xp = Series(rs) - rs = Series(vals).rolling(5, win_type='boxcar', center=True).mean() - tm.assert_series_equal(xp, rs) + result = Series(vals).rolling(5, win_type='boxcar', center=True).mean() + expected = Series([np.nan, np.nan, 9.962, 11.27, 11.564, 12.516, + 12.818, 12.952, np.nan, np.nan]) + tm.assert_series_equal(expected, result) @td.skip_if_no_scipy def test_cmov_window_corner(self): # GH 8238 # all nan - vals = np.empty(10, dtype=float) - vals.fill(np.nan) - with catch_warnings(record=True): - rs = mom.rolling_window(vals, 5, 'boxcar', center=True) - assert np.isnan(rs).all() + vals = pd.Series([np.nan] * 10) + result = vals.rolling(5, center=True, win_type='boxcar').mean() + assert np.isnan(result).all() # empty - vals = np.array([]) - with catch_warnings(record=True): - rs = mom.rolling_window(vals, 5, 'boxcar', center=True) - assert len(rs) == 0 + vals = pd.Series([]) + result = vals.rolling(5, center=True, win_type='boxcar').mean() + assert len(result) == 0 # shorter than window - vals = np.random.randn(5) - with catch_warnings(record=True): - rs = mom.rolling_window(vals, 10, 'boxcar') - assert np.isnan(rs).all() - assert len(rs) == 5 + vals = pd.Series(np.random.randn(5)) + result = vals.rolling(10, win_type='boxcar').mean() + assert np.isnan(result).all() + assert len(result) == 5 @td.skip_if_no_scipy def test_cmov_window_frame(self): @@ -1097,38 +1065,31 @@ def test_cmov_window_special_linear_range(self): tm.assert_series_equal(xp, rs) def test_rolling_median(self): - with catch_warnings(record=True): - self._check_moment_func(mom.rolling_median, np.median, - name='median') + self._check_moment_func(np.median, name='median') def test_rolling_min(self): + self._check_moment_func(np.min, name='min') - with catch_warnings(record=True): - self._check_moment_func(mom.rolling_min, np.min, name='min') - - with catch_warnings(record=True): - a = np.array([1, 2, 3, 4, 5]) - b = mom.rolling_min(a, window=100, min_periods=1) - tm.assert_almost_equal(b, np.ones(len(a))) + a = pd.Series([1, 2, 3, 4, 5]) + result = a.rolling(window=100, min_periods=1).min() + expected = pd.Series(np.ones(len(a))) + tm.assert_series_equal(result, expected) - pytest.raises(ValueError, mom.rolling_min, np.array([1, 2, 3]), - window=3, min_periods=5) + with pytest.raises(ValueError): + pd.Series([1, 2, 3]).rolling(window=3, min_periods=5).min() def test_rolling_max(self): + self._check_moment_func(np.max, name='max') - with catch_warnings(record=True): - self._check_moment_func(mom.rolling_max, np.max, name='max') + a = pd.Series([1, 2, 3, 4, 5], dtype=np.float64) + b = a.rolling(window=100, min_periods=1).max() + tm.assert_almost_equal(a, b) - with catch_warnings(record=True): - a = np.array([1, 2, 3, 4, 5], dtype=np.float64) - b = mom.rolling_max(a, window=100, min_periods=1) - tm.assert_almost_equal(a, b) - - pytest.raises(ValueError, mom.rolling_max, np.array([1, 2, 3]), - window=3, min_periods=5) + with pytest.raises(ValueError): + pd.Series([1, 2, 3]).rolling(window=3, min_periods=5).max() - def test_rolling_quantile(self): - qs = [0.0, .1, .5, .9, 1.0] + @pytest.mark.parametrize('q', [0.0, .1, .5, .9, 1.0]) + def test_rolling_quantile(self, q): def scoreatpercentile(a, per): values = np.sort(a, axis=0) @@ -1147,18 +1108,11 @@ def scoreatpercentile(a, per): return retval - for q in qs: - - def f(x, window, quantile, min_periods=None, freq=None, - center=False): - return mom.rolling_quantile(x, window, quantile, - min_periods=min_periods, freq=freq, - center=center) + def quantile_func(x): + return scoreatpercentile(x, q) - def alt(x): - return scoreatpercentile(x, q) - - self._check_moment_func(f, alt, name='quantile', quantile=q) + self._check_moment_func(quantile_func, name='quantile', + quantile=q) def test_rolling_quantile_np_percentile(self): # #9413: Tests that rolling window's quantile default behavior @@ -1207,15 +1161,10 @@ def test_rolling_apply(self): tm.assert_series_equal(ser, ser.rolling(10).apply(lambda x: x.mean())) - f = lambda x: x[np.isfinite(x)].mean() - - def roll_mean(x, window, min_periods=None, freq=None, center=False, - **kwargs): - return mom.rolling_apply(x, window, func=f, - min_periods=min_periods, freq=freq, - center=center) + def f(x): + return x[np.isfinite(x)].mean() - self._check_moment_func(roll_mean, np.mean, name='apply', func=f) + self._check_moment_func(np.mean, name='apply', func=f) # GH 8080 s = Series([None, None, None]) @@ -1228,39 +1177,34 @@ def roll_mean(x, window, min_periods=None, freq=None, center=False, def test_rolling_apply_out_of_bounds(self): # #1850 - arr = np.arange(4) + vals = pd.Series([1, 2, 3, 4]) - # it works! - with catch_warnings(record=True): - result = mom.rolling_apply(arr, 10, np.sum) - assert isna(result).all() + result = vals.rolling(10).apply(np.sum) + assert result.isna().all() - with catch_warnings(record=True): - result = mom.rolling_apply(arr, 10, np.sum, min_periods=1) - tm.assert_almost_equal(result, result) + result = vals.rolling(10, min_periods=1).apply(np.sum) + expected = pd.Series([1, 3, 6, 10], dtype=float) + tm.assert_almost_equal(result, expected) def test_rolling_std(self): - self._check_moment_func(mom.rolling_std, lambda x: np.std(x, ddof=1), + self._check_moment_func(lambda x: np.std(x, ddof=1), name='std') - self._check_moment_func(mom.rolling_std, lambda x: np.std(x, ddof=0), + self._check_moment_func(lambda x: np.std(x, ddof=0), name='std', ddof=0) def test_rolling_std_1obs(self): - with catch_warnings(record=True): - result = mom.rolling_std(np.array([1., 2., 3., 4., 5.]), - 1, min_periods=1) - expected = np.array([np.nan] * 5) - tm.assert_almost_equal(result, expected) + vals = pd.Series([1., 2., 3., 4., 5.]) - with catch_warnings(record=True): - result = mom.rolling_std(np.array([1., 2., 3., 4., 5.]), - 1, min_periods=1, ddof=0) - expected = np.zeros(5) - tm.assert_almost_equal(result, expected) + result = vals.rolling(1, min_periods=1).std() + expected = pd.Series([np.nan] * 5) + tm.assert_series_equal(result, expected) - with catch_warnings(record=True): - result = mom.rolling_std(np.array([np.nan, np.nan, 3., 4., 5.]), - 3, min_periods=2) + result = vals.rolling(1, min_periods=1).std(ddof=0) + expected = pd.Series([0.] * 5) + tm.assert_series_equal(result, expected) + + result = (pd.Series([np.nan, np.nan, 3, 4, 5]) + .rolling(3, min_periods=2).std()) assert np.isnan(result[2]) def test_rolling_std_neg_sqrt(self): @@ -1268,208 +1212,53 @@ def test_rolling_std_neg_sqrt(self): # Test move_nanstd for neg sqrt. - a = np.array([0.0011448196318903589, 0.00028718669878572767, - 0.00028718669878572767, 0.00028718669878572767, - 0.00028718669878572767]) - with catch_warnings(record=True): - b = mom.rolling_std(a, window=3) + a = pd.Series([0.0011448196318903589, 0.00028718669878572767, + 0.00028718669878572767, 0.00028718669878572767, + 0.00028718669878572767]) + b = a.rolling(window=3).std() assert np.isfinite(b[2:]).all() - with catch_warnings(record=True): - b = mom.ewmstd(a, span=3) + b = a.ewm(span=3).std() assert np.isfinite(b[2:]).all() def test_rolling_var(self): - self._check_moment_func(mom.rolling_var, lambda x: np.var(x, ddof=1), - test_stable=True, name='var') - self._check_moment_func(mom.rolling_var, lambda x: np.var(x, ddof=0), + self._check_moment_func(lambda x: np.var(x, ddof=1), + name='var') + self._check_moment_func(lambda x: np.var(x, ddof=0), name='var', ddof=0) @td.skip_if_no_scipy def test_rolling_skew(self): from scipy.stats import skew - self._check_moment_func(mom.rolling_skew, - lambda x: skew(x, bias=False), name='skew') + self._check_moment_func(lambda x: skew(x, bias=False), name='skew') @td.skip_if_no_scipy def test_rolling_kurt(self): from scipy.stats import kurtosis - self._check_moment_func(mom.rolling_kurt, - lambda x: kurtosis(x, bias=False), name='kurt') - - def test_fperr_robustness(self): - # TODO: remove this once python 2.5 out of picture - if PY3: - pytest.skip("doesn't work on python 3") - - # #2114 - data = '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1a@\xaa\xaa\xaa\xaa\xaa\xaa\x02@8\x8e\xe38\x8e\xe3\xe8?z\t\xed%\xb4\x97\xd0?\xa2\x0c<\xdd\x9a\x1f\xb6?\x82\xbb\xfa&y\x7f\x9d?\xac\'\xa7\xc4P\xaa\x83?\x90\xdf\xde\xb0k8j?`\xea\xe9u\xf2zQ?*\xe37\x9d\x98N7?\xe2.\xf5&v\x13\x1f?\xec\xc9\xf8\x19\xa4\xb7\x04?\x90b\xf6w\x85\x9f\xeb>\xb5A\xa4\xfaXj\xd2>F\x02\xdb\xf8\xcb\x8d\xb8>.\xac<\xfb\x87^\xa0>\xe8:\xa6\xf9_\xd3\x85>\xfb?\xe2cUU\xfd?\xfc\x7fA\xed8\x8e\xe3?\xa5\xaa\xac\x91\xf6\x12\xca?n\x1cs\xb6\xf9a\xb1?\xe8%D\xf3L-\x97?5\xddZD\x11\xe7~?#>\xe7\x82\x0b\x9ad?\xd9R4Y\x0fxK?;7x;\nP2?N\xf4JO\xb8j\x18?4\xf81\x8a%G\x00?\x9a\xf5\x97\r2\xb4\xe5>\xcd\x9c\xca\xbcB\xf0\xcc>3\x13\x87(\xd7J\xb3>\x99\x19\xb4\xe0\x1e\xb9\x99>ff\xcd\x95\x14&\x81>\x88\x88\xbc\xc7p\xddf>`\x0b\xa6_\x96|N>@\xb2n\xea\x0eS4>U\x98\x938i\x19\x1b>\x8eeb\xd0\xf0\x10\x02>\xbd\xdc-k\x96\x16\xe8=(\x93\x1e\xf2\x0e\x0f\xd0=\xe0n\xd3Bii\xb5=*\xe9\x19Y\x8c\x8c\x9c=\xc6\xf0\xbb\x90]\x08\x83=]\x96\xfa\xc0|`i=>d\xfc\xd5\xfd\xeaP=R0\xfb\xc7\xa7\x8e6=\xc2\x95\xf9_\x8a\x13\x1e=\xd6c\xa6\xea\x06\r\x04=r\xda\xdd8\t\xbc\xea<\xf6\xe6\x93\xd0\xb0\xd2\xd1<\x9d\xdeok\x96\xc3\xb7<&~\xea9s\xaf\x9f\xb8\x02@\xc6\xd2&\xfd\xa8\xf5\xe8?\xd9\xe1\x19\xfe\xc5\xa3\xd0?v\x82"\xa8\xb2/\xb6?\x9dX\x835\xee\x94\x9d?h\x90W\xce\x9e\xb8\x83?\x8a\xc0th~Kj?\\\x80\xf8\x9a\xa9\x87Q?%\xab\xa0\xce\x8c_7?1\xe4\x80\x13\x11*\x1f? \x98\x00\r\xb6\xc6\x04?\x80u\xabf\x9d\xb3\xeb>UNrD\xbew\xd2>\x1c\x13C[\xa8\x9f\xb8>\x12b\xd7m-\x1fQ@\xe3\x85>\xe6\x91)l\x00/m>Da\xc6\xf2\xaatS>\x05\xd7]\xee\xe3\xf09>' # noqa - - arr = np.frombuffer(data, dtype='= 0).all() - - with catch_warnings(record=True): - result = mom.rolling_mean(arr, 2) - assert (result[1:] >= 0).all() - - with catch_warnings(record=True): - result = mom.rolling_var(arr, 2) - assert (result[1:] >= 0).all() + self._check_moment_func(lambda x: kurtosis(x, bias=False), + name='kurt') - # #2527, ugh - arr = np.array([0.00012456, 0.0003, 0]) - with catch_warnings(record=True): - result = mom.rolling_mean(arr, 1) - assert result[-1] >= 0 - - with catch_warnings(record=True): - result = mom.rolling_mean(-arr, 1) - assert result[-1] <= 0 - - def _check_moment_func(self, f, static_comp, name=None, window=50, - has_min_periods=True, has_center=True, - has_time_rule=True, preserve_nan=True, - fill_value=None, test_stable=False, - zero_min_periods_equal=True, + def _check_moment_func(self, static_comp, name, has_min_periods=True, + has_center=True, has_time_rule=True, + fill_value=None, zero_min_periods_equal=True, **kwargs): - with warnings.catch_warnings(record=True): - self._check_ndarray(f, static_comp, window=window, - has_min_periods=has_min_periods, - preserve_nan=preserve_nan, - has_center=has_center, fill_value=fill_value, - test_stable=test_stable, - zero_min_periods_equal=zero_min_periods_equal, - **kwargs) - - with warnings.catch_warnings(record=True): - self._check_structures(f, static_comp, - has_min_periods=has_min_periods, - has_time_rule=has_time_rule, - fill_value=fill_value, - has_center=has_center, **kwargs) - - # new API - if name is not None: - self._check_structures(f, static_comp, name=name, - has_min_periods=has_min_periods, - has_time_rule=has_time_rule, - fill_value=fill_value, - has_center=has_center, **kwargs) - - def _check_ndarray(self, f, static_comp, window=50, has_min_periods=True, - preserve_nan=True, has_center=True, fill_value=None, - test_stable=False, test_window=True, - zero_min_periods_equal=True, **kwargs): - def get_result(arr, window, min_periods=None, center=False): - return f(arr, window, min_periods=min_periods, center=center, ** - kwargs) - - result = get_result(self.arr, window) - tm.assert_almost_equal(result[-1], static_comp(self.arr[-50:])) - - if preserve_nan: - assert (np.isnan(result[self._nan_locs]).all()) - - # excluding NaNs correctly - arr = randn(50) - arr[:10] = np.NaN - arr[-10:] = np.NaN - - if has_min_periods: - result = get_result(arr, 50, min_periods=30) - tm.assert_almost_equal(result[-1], static_comp(arr[10:-10])) - - # min_periods is working correctly - result = get_result(arr, 20, min_periods=15) - assert np.isnan(result[23]) - assert not np.isnan(result[24]) - - assert not np.isnan(result[-6]) - assert np.isnan(result[-5]) - - arr2 = randn(20) - result = get_result(arr2, 10, min_periods=5) - assert isna(result[3]) - assert notna(result[4]) - - if zero_min_periods_equal: - # min_periods=0 may be equivalent to min_periods=1 - result0 = get_result(arr, 20, min_periods=0) - result1 = get_result(arr, 20, min_periods=1) - tm.assert_almost_equal(result0, result1) - else: - result = get_result(arr, 50) - tm.assert_almost_equal(result[-1], static_comp(arr[10:-10])) - - # GH 7925 - if has_center: - if has_min_periods: - result = get_result(arr, 20, min_periods=15, center=True) - expected = get_result( - np.concatenate((arr, np.array([np.NaN] * 9))), 20, - min_periods=15)[9:] - else: - result = get_result(arr, 20, center=True) - expected = get_result( - np.concatenate((arr, np.array([np.NaN] * 9))), 20)[9:] - - tm.assert_numpy_array_equal(result, expected) - - if test_stable: - result = get_result(self.arr + 1e9, window) - tm.assert_almost_equal(result[-1], - static_comp(self.arr[-50:] + 1e9)) - - # Test window larger than array, #7297 - if test_window: - if has_min_periods: - for minp in (0, len(self.arr) - 1, len(self.arr)): - result = get_result(self.arr, len(self.arr) + 1, - min_periods=minp) - expected = get_result(self.arr, len(self.arr), - min_periods=minp) - nan_mask = np.isnan(result) - tm.assert_numpy_array_equal(nan_mask, np.isnan(expected)) - - nan_mask = ~nan_mask - tm.assert_almost_equal(result[nan_mask], - expected[nan_mask]) - else: - result = get_result(self.arr, len(self.arr) + 1) - expected = get_result(self.arr, len(self.arr)) - nan_mask = np.isnan(result) - tm.assert_numpy_array_equal(nan_mask, np.isnan(expected)) - - nan_mask = ~nan_mask - tm.assert_almost_equal(result[nan_mask], expected[nan_mask]) - - def _check_structures(self, f, static_comp, name=None, - has_min_periods=True, has_time_rule=True, - has_center=True, fill_value=None, **kwargs): def get_result(obj, window, min_periods=None, center=False): - - # check via the API calls if name is provided - if name is not None: - r = obj.rolling(window=window, min_periods=min_periods, - center=center) - return getattr(r, name)(**kwargs) - - # check via the moments API - with catch_warnings(record=True): - return f(obj, window=window, min_periods=min_periods, - center=center, **kwargs) + r = obj.rolling(window=window, min_periods=min_periods, + center=center) + return getattr(r, name)(**kwargs) series_result = get_result(self.series, window=50) - frame_result = get_result(self.frame, window=50) - assert isinstance(series_result, Series) - assert type(frame_result) == DataFrame + tm.assert_almost_equal(series_result.iloc[-1], + static_comp(self.series[-50:])) + + frame_result = get_result(self.frame, window=50) + assert isinstance(frame_result, DataFrame) + tm.assert_series_equal(frame_result.iloc[-1, :], + self.frame.iloc[-50:, :].apply(static_comp, + axis=0), + check_names=False) # check time_rule works if has_time_rule: @@ -1500,8 +1289,72 @@ def get_result(obj, window, min_periods=None, center=False): trunc_frame.apply(static_comp), check_names=False) - # GH 7925 + # excluding NaNs correctly + obj = Series(randn(50)) + obj[:10] = np.NaN + obj[-10:] = np.NaN + if has_min_periods: + result = get_result(obj, 50, min_periods=30) + tm.assert_almost_equal(result.iloc[-1], static_comp(obj[10:-10])) + + # min_periods is working correctly + result = get_result(obj, 20, min_periods=15) + assert isna(result.iloc[23]) + assert not isna(result.iloc[24]) + + assert not isna(result.iloc[-6]) + assert isna(result.iloc[-5]) + + obj2 = Series(randn(20)) + result = get_result(obj2, 10, min_periods=5) + assert isna(result.iloc[3]) + assert notna(result.iloc[4]) + + if zero_min_periods_equal: + # min_periods=0 may be equivalent to min_periods=1 + result0 = get_result(obj, 20, min_periods=0) + result1 = get_result(obj, 20, min_periods=1) + tm.assert_almost_equal(result0, result1) + else: + result = get_result(obj, 50) + tm.assert_almost_equal(result.iloc[-1], static_comp(obj[10:-10])) + + # window larger than series length (#7297) + if has_min_periods: + for minp in (0, len(self.series) - 1, len(self.series)): + result = get_result(self.series, len(self.series) + 1, + min_periods=minp) + expected = get_result(self.series, len(self.series), + min_periods=minp) + nan_mask = isna(result) + tm.assert_series_equal(nan_mask, isna(expected)) + + nan_mask = ~nan_mask + tm.assert_almost_equal(result[nan_mask], + expected[nan_mask]) + else: + result = get_result(self.series, len(self.series) + 1) + expected = get_result(self.series, len(self.series)) + nan_mask = isna(result) + tm.assert_series_equal(nan_mask, isna(expected)) + + nan_mask = ~nan_mask + tm.assert_almost_equal(result[nan_mask], expected[nan_mask]) + + # check center=True if has_center: + if has_min_periods: + result = get_result(obj, 20, min_periods=15, center=True) + expected = get_result( + pd.concat([obj, Series([np.NaN] * 9)]), 20, + min_periods=15)[9:].reset_index(drop=True) + else: + result = get_result(obj, 20, center=True) + expected = get_result( + pd.concat([obj, Series([np.NaN] * 9)]), + 20)[9:].reset_index(drop=True) + + tm.assert_series_equal(result, expected) # shifter index s = ['x%d' % x for x in range(12)] @@ -1541,12 +1394,11 @@ def get_result(obj, window, min_periods=None, center=False): tm.assert_frame_equal(frame_xp, frame_rs) def test_ewma(self): - self._check_ew(mom.ewma, name='mean') + self._check_ew(name='mean') - arr = np.zeros(1000) - arr[5] = 1 - with catch_warnings(record=True): - result = mom.ewma(arr, span=100, adjust=False).sum() + vals = pd.Series(np.zeros(1000)) + vals[5] = 1 + result = vals.ewm(span=100, adjust=False).mean().sum() assert np.abs(result - 1) < 1e-2 s = Series([1.0, 2.0, 4.0, 8.0]) @@ -1626,55 +1478,34 @@ def simple_wma(s, w): tm.assert_series_equal(result, expected) def test_ewmvar(self): - self._check_ew(mom.ewmvar, name='var') + self._check_ew(name='var') def test_ewmvol(self): - self._check_ew(mom.ewmvol, name='vol') + self._check_ew(name='vol') def test_ewma_span_com_args(self): - with catch_warnings(record=True): - A = mom.ewma(self.arr, com=9.5) - B = mom.ewma(self.arr, span=20) - tm.assert_almost_equal(A, B) + A = self.series.ewm(com=9.5).mean() + B = self.series.ewm(span=20).mean() + tm.assert_almost_equal(A, B) - pytest.raises(ValueError, mom.ewma, self.arr, com=9.5, span=20) - pytest.raises(ValueError, mom.ewma, self.arr) + with pytest.raises(ValueError): + self.series.ewm(com=9.5, span=20) + with pytest.raises(ValueError): + self.series.ewm().mean() def test_ewma_halflife_arg(self): - with catch_warnings(record=True): - A = mom.ewma(self.arr, com=13.932726172912965) - B = mom.ewma(self.arr, halflife=10.0) - tm.assert_almost_equal(A, B) - - pytest.raises(ValueError, mom.ewma, self.arr, span=20, - halflife=50) - pytest.raises(ValueError, mom.ewma, self.arr, com=9.5, - halflife=50) - pytest.raises(ValueError, mom.ewma, self.arr, com=9.5, span=20, - halflife=50) - pytest.raises(ValueError, mom.ewma, self.arr) - - def test_ewma_alpha_old_api(self): - # GH 10789 - with catch_warnings(record=True): - a = mom.ewma(self.arr, alpha=0.61722699889169674) - b = mom.ewma(self.arr, com=0.62014947789973052) - c = mom.ewma(self.arr, span=2.240298955799461) - d = mom.ewma(self.arr, halflife=0.721792864318) - tm.assert_numpy_array_equal(a, b) - tm.assert_numpy_array_equal(a, c) - tm.assert_numpy_array_equal(a, d) - - def test_ewma_alpha_arg_old_api(self): - # GH 10789 - with catch_warnings(record=True): - pytest.raises(ValueError, mom.ewma, self.arr) - pytest.raises(ValueError, mom.ewma, self.arr, - com=10.0, alpha=0.5) - pytest.raises(ValueError, mom.ewma, self.arr, - span=10.0, alpha=0.5) - pytest.raises(ValueError, mom.ewma, self.arr, - halflife=10.0, alpha=0.5) + A = self.series.ewm(com=13.932726172912965).mean() + B = self.series.ewm(halflife=10.0).mean() + tm.assert_almost_equal(A, B) + + with pytest.raises(ValueError): + self.series.ewm(span=20, halflife=50) + with pytest.raises(ValueError): + self.series.ewm(com=9.5, halflife=50) + with pytest.raises(ValueError): + self.series.ewm(com=9.5, span=20, halflife=50) + with pytest.raises(ValueError): + self.series.ewm() def test_ewm_alpha(self): # GH 10789 @@ -1689,11 +1520,15 @@ def test_ewm_alpha(self): def test_ewm_alpha_arg(self): # GH 10789 - s = Series(self.arr) - pytest.raises(ValueError, s.ewm) - pytest.raises(ValueError, s.ewm, com=10.0, alpha=0.5) - pytest.raises(ValueError, s.ewm, span=10.0, alpha=0.5) - pytest.raises(ValueError, s.ewm, halflife=10.0, alpha=0.5) + s = self.series + with pytest.raises(ValueError): + s.ewm() + with pytest.raises(ValueError): + s.ewm(com=10.0, alpha=0.5) + with pytest.raises(ValueError): + s.ewm(span=10.0, alpha=0.5) + with pytest.raises(ValueError): + s.ewm(halflife=10.0, alpha=0.5) def test_ewm_domain_checks(self): # GH 12492 @@ -1719,24 +1554,25 @@ def test_ewm_domain_checks(self): s.ewm(alpha=1.0) pytest.raises(ValueError, s.ewm, alpha=1.1) - def test_ew_empty_arrays(self): - arr = np.array([], dtype=np.float64) + def test_ew_empty_series(self): + vals = pd.Series([], dtype=np.float64) - funcs = [mom.ewma, mom.ewmvol, mom.ewmvar] + ewm = vals.ewm(3) + funcs = ['mean', 'vol', 'var'] for f in funcs: - with catch_warnings(record=True): - result = f(arr, 3) - tm.assert_almost_equal(result, arr) + result = getattr(ewm, f)() + tm.assert_almost_equal(result, vals) - def _check_ew(self, func, name=None): - with catch_warnings(record=True): - self._check_ew_ndarray(func, name=name) - self._check_ew_structures(func, name=name) + def _check_ew(self, name=None, preserve_nan=False): + series_result = getattr(self.series.ewm(com=10), name)() + assert isinstance(series_result, Series) + + frame_result = getattr(self.frame.ewm(com=10), name)() + assert type(frame_result) == DataFrame - def _check_ew_ndarray(self, func, preserve_nan=False, name=None): - result = func(self.arr, com=10) + result = getattr(self.series.ewm(com=10), name)() if preserve_nan: - assert (np.isnan(result[self._nan_locs]).all()) + assert result[self._nan_locs].isna().all() # excluding NaNs correctly arr = randn(50) @@ -1746,45 +1582,40 @@ def _check_ew_ndarray(self, func, preserve_nan=False, name=None): # check min_periods # GH 7898 - result = func(s, 50, min_periods=2) - assert np.isnan(result.values[:11]).all() - assert not np.isnan(result.values[11:]).any() + result = getattr(s.ewm(com=50, min_periods=2), name)() + assert result[:11].isna().all() + assert not result[11:].isna().any() for min_periods in (0, 1): - result = func(s, 50, min_periods=min_periods) - if func == mom.ewma: - assert np.isnan(result.values[:10]).all() - assert not np.isnan(result.values[10:]).any() + result = getattr(s.ewm(com=50, min_periods=min_periods), name)() + if name == 'mean': + assert result[:10].isna().all() + assert not result[10:].isna().any() else: - # ewmstd, ewmvol, ewmvar (with bias=False) require at least two - # values - assert np.isnan(result.values[:11]).all() - assert not np.isnan(result.values[11:]).any() + # ewm.std, ewm.vol, ewm.var (with bias=False) require at least + # two values + assert result[:11].isna().all() + assert not result[11:].isna().any() # check series of length 0 - result = func(Series([]), 50, min_periods=min_periods) - tm.assert_series_equal(result, Series([])) + result = getattr(Series().ewm(com=50, min_periods=min_periods), + name)() + tm.assert_series_equal(result, Series()) # check series of length 1 - result = func(Series([1.]), 50, min_periods=min_periods) - if func == mom.ewma: + result = getattr(Series([1.]).ewm(50, min_periods=min_periods), + name)() + if name == 'mean': tm.assert_series_equal(result, Series([1.])) else: - # ewmstd, ewmvol, ewmvar with bias=False require at least two - # values + # ewm.std, ewm.vol, ewm.var with bias=False require at least + # two values tm.assert_series_equal(result, Series([np.NaN])) # pass in ints - result2 = func(np.arange(50), span=10) + result2 = getattr(Series(np.arange(50)).ewm(span=10), name)() assert result2.dtype == np.float_ - def _check_ew_structures(self, func, name): - series_result = getattr(self.series.ewm(com=10), name)() - assert isinstance(series_result, Series) - - frame_result = getattr(self.frame.ewm(com=10), name)() - assert type(frame_result) == DataFrame - class TestPairwise(object): @@ -2021,9 +1852,6 @@ class TestMomentsConsistency(Base): # lambda v: Series(v).skew(), 3, 'skew'), # (lambda v: Series(v).kurt(), 4, 'kurt'), - # (lambda x, min_periods: mom.expanding_quantile(x, 0.3, - # min_periods=min_periods, 'quantile'), - # restore once GH 8084 is fixed # lambda v: Series(v).quantile(0.3), None, 'quantile'), @@ -2585,22 +2413,6 @@ def func(A, B, com, **kwargs): pytest.raises(Exception, func, A, randn(50), 20, min_periods=5) - def test_expanding_apply(self): - ser = Series([]) - tm.assert_series_equal(ser, ser.expanding().apply(lambda x: x.mean())) - - def expanding_mean(x, min_periods=1): - return mom.expanding_apply(x, lambda x: x.mean(), - min_periods=min_periods) - - self._check_expanding(expanding_mean, np.mean) - - # GH 8080 - s = Series([None, None, None]) - result = s.expanding(min_periods=0).apply(lambda x: len(x)) - expected = Series([1., 2., 3.]) - tm.assert_series_equal(result, expected) - def test_expanding_apply_args_kwargs(self): def mean_w_arg(x, const): return np.mean(x) + const @@ -2648,9 +2460,6 @@ def test_expanding_cov(self): tm.assert_almost_equal(rolling_result, result) - def test_expanding_max(self): - self._check_expanding(mom.expanding_max, np.max, preserve_nan=False) - def test_expanding_cov_pairwise(self): result = self.frame.expanding().corr() @@ -2980,55 +2789,73 @@ def test_rolling_kurt_eq_value_fperr(self): a = Series([1.1] * 15).rolling(window=10).kurt() assert np.isnan(a).all() - def _check_expanding_ndarray(self, func, static_comp, has_min_periods=True, - has_time_rule=True, preserve_nan=True): - result = func(self.arr) + @pytest.mark.parametrize('func,static_comp', [('sum', np.sum), + ('mean', np.mean), + ('max', np.max), + ('min', np.min)], + ids=['sum', 'mean', 'max', 'min']) + def test_expanding_func(self, func, static_comp): + def expanding_func(x, min_periods=1, center=False, axis=0): + exp = x.expanding(min_periods=min_periods, + center=center, axis=axis) + return getattr(exp, func)() + self._check_expanding(expanding_func, static_comp, preserve_nan=False) + + def test_expanding_apply(self): + + def expanding_mean(x, min_periods=1): + exp = x.expanding(min_periods=min_periods) + return exp.apply(lambda x: x.mean()) + + self._check_expanding(expanding_mean, np.mean) + + ser = Series([]) + tm.assert_series_equal(ser, ser.expanding().apply(lambda x: x.mean())) - tm.assert_almost_equal(result[10], static_comp(self.arr[:11])) + # GH 8080 + s = Series([None, None, None]) + result = s.expanding(min_periods=0).apply(lambda x: len(x)) + expected = Series([1., 2., 3.]) + tm.assert_series_equal(result, expected) + + def _check_expanding(self, func, static_comp, has_min_periods=True, + has_time_rule=True, preserve_nan=True): + + series_result = func(self.series) + assert isinstance(series_result, Series) + frame_result = func(self.frame) + assert isinstance(frame_result, DataFrame) + + result = func(self.series) + tm.assert_almost_equal(result[10], static_comp(self.series[:11])) if preserve_nan: - assert (np.isnan(result[self._nan_locs]).all()) + assert result.iloc[self._nan_locs].isna().all() - arr = randn(50) + ser = Series(randn(50)) if has_min_periods: - result = func(arr, min_periods=30) - assert (np.isnan(result[:29]).all()) - tm.assert_almost_equal(result[-1], static_comp(arr[:50])) + result = func(ser, min_periods=30) + assert result[:29].isna().all() + tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50])) # min_periods is working correctly - result = func(arr, min_periods=15) - assert np.isnan(result[13]) - assert not np.isnan(result[14]) + result = func(ser, min_periods=15) + assert isna(result.iloc[13]) + assert notna(result.iloc[14]) - arr2 = randn(20) - result = func(arr2, min_periods=5) + ser2 = Series(randn(20)) + result = func(ser2, min_periods=5) assert isna(result[3]) assert notna(result[4]) # min_periods=0 - result0 = func(arr, min_periods=0) - result1 = func(arr, min_periods=1) + result0 = func(ser, min_periods=0) + result1 = func(ser, min_periods=1) tm.assert_almost_equal(result0, result1) else: - result = func(arr) - tm.assert_almost_equal(result[-1], static_comp(arr[:50])) - - def _check_expanding_structures(self, func): - series_result = func(self.series) - assert isinstance(series_result, Series) - frame_result = func(self.frame) - assert type(frame_result) == DataFrame - - def _check_expanding(self, func, static_comp, has_min_periods=True, - has_time_rule=True, preserve_nan=True): - with warnings.catch_warnings(record=True): - self._check_expanding_ndarray(func, static_comp, - has_min_periods=has_min_periods, - has_time_rule=has_time_rule, - preserve_nan=preserve_nan) - with warnings.catch_warnings(record=True): - self._check_expanding_structures(func) + result = func(ser) + tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50])) def test_rolling_max_gh6297(self): """Replicate result expected in GH #6297"""