diff --git a/doc/source/computation.rst b/doc/source/computation.rst index d5dcacf53ec23..b8559eb51ece8 100644 --- a/doc/source/computation.rst +++ b/doc/source/computation.rst @@ -413,6 +413,8 @@ columns using ``ix`` indexing: @savefig rolling_corr_pairwise_ex.png correls.ix[:, 'A', 'C'].plot() +.. _stats.moments.expanding: + Expanding window moment functions --------------------------------- A common alternative to rolling statistics is to use an *expanding* window, @@ -485,60 +487,79 @@ relative impact of an individual data point. As an example, here is the @savefig expanding_mean_frame.png expanding_mean(ts).plot(style='k') +.. _stats.moments.exponentially_weighted: + Exponentially weighted moment functions --------------------------------------- -A related set of functions are exponentially weighted versions of many of the -above statistics. A number of EW (exponentially weighted) functions are -provided using the blending method. For example, where :math:`y_t` is the -result and :math:`x_t` the input, we compute an exponentially weighted moving -average as +A related set of functions are exponentially weighted versions of several of +the above statistics. A number of expanding EW (exponentially weighted) +functions are provided: + +.. csv-table:: + :header: "Function", "Description" + :widths: 20, 80 + + ``ewma``, EW moving average + ``ewmvar``, EW moving variance + ``ewmstd``, EW moving standard deviation + ``ewmcorr``, EW moving correlation + ``ewmcov``, EW moving covariance + +In general, a weighted moving average is calculated as .. math:: - y_t = (1 - \alpha) y_{t-1} + \alpha x_t + y_t = \frac{\sum_{i=0}^t w_i x_{t-i}}{\sum_{i=0}^t w_i}, -One must have :math:`0 < \alpha \leq 1`, but rather than pass :math:`\alpha` -directly, it's easier to think about either the **span**, **center of mass -(com)** or **halflife** of an EW moment: +where :math:`x_t` is the input at :math:`y_t` is the result. + +The EW functions support two variants of exponential weights: +The default, ``adjust=True``, uses the weights :math:`w_i = (1 - \alpha)^i`. +When ``adjust=False`` is specified, moving averages are calculated as .. math:: - \alpha = - \begin{cases} - \frac{2}{s + 1}, s = \text{span}\\ - \frac{1}{1 + c}, c = \text{center of mass}\\ - 1 - \exp^{\frac{\log 0.5}{h}}, h = \text{half life} + y_0 &= x_0 \\ + y_t &= (1 - \alpha) y_{t-1} + \alpha x_t, + +which is equivalent to using weights + +.. math:: + + w_i = \begin{cases} + \alpha (1 - \alpha)^i & \text{if } i < t \\ + (1 - \alpha)^i & \text{if } i = t. \end{cases} .. note:: - the equation above is sometimes written in the form + These equations are sometimes written in terms of :math:`\alpha' = 1 - \alpha`, e.g. + + .. math:: - .. math:: + y_t = \alpha' y_{t-1} + (1 - \alpha') x_t. - y_t = \alpha' y_{t-1} + (1 - \alpha') x_t +One must have :math:`0 < \alpha \leq 1`, but rather than pass :math:`\alpha` +directly, it's easier to think about either the **span**, **center of mass +(com)** or **halflife** of an EW moment: - where :math:`\alpha' = 1 - \alpha`. +.. math:: -You can pass one of the three to these functions but not more. **Span** + \alpha = + \begin{cases} + \frac{2}{s + 1}, & s = \text{span}\\ + \frac{1}{1 + c}, & c = \text{center of mass}\\ + 1 - \exp^{\frac{\log 0.5}{h}}, & h = \text{half life} + \end{cases} + +One must specify precisely one of the three to the EW functions. **Span** corresponds to what is commonly called a "20-day EW moving average" for example. **Center of mass** has a more physical interpretation. For example, **span** = 20 corresponds to **com** = 9.5. **Halflife** is the period of -time for the exponential weight to reduce to one half. Here is the list of -functions available: - -.. csv-table:: - :header: "Function", "Description" - :widths: 20, 80 - - ``ewma``, EW moving average - ``ewmvar``, EW moving variance - ``ewmstd``, EW moving standard deviation - ``ewmcorr``, EW moving correlation - ``ewmcov``, EW moving covariance +time for the exponential weight to reduce to one half. -Here are an example for a univariate time series: +Here is an example for a univariate time series: .. ipython:: python @@ -548,8 +569,45 @@ Here are an example for a univariate time series: @savefig ewma_ex.png ewma(ts, span=20).plot(style='k') -.. note:: +All the EW functions have a ``min_periods`` argument, which has the same +meaning it does for all the ``expanding_`` and ``rolling_`` functions: +no output values will be set until at least ``min_periods`` non-null values +are encountered in the (expanding) window. +(This is a change from versions prior to 0.15.0, in which the ``min_periods`` +argument affected only the ``min_periods`` consecutive entries starting at the +first non-null value.) + +All the EW functions also have an ``ignore_na`` argument, which deterines how +intermediate null values affect the calculation of the weights. +When ``ignore_na=False`` (the default), weights are calculated based on absolute +positions, so that intermediate null values affect the result. +When ``ignore_na=True`` (which reproduces the behavior in versions prior to 0.15.0), +weights are calculated by ignoring intermediate null values. +For example, assuming ``adjust=True``, if ``ignore_na=False``, the weighted +average of ``3, NaN, 5`` would be calculated as + +.. math:: + + \frac{(1-\alpha)^2 \cdot 3 + 1 \cdot 5}{(1-\alpha)^2 + 1} + +Whereas if ``ignore_na=True``, the weighted average would be calculated as + +.. math:: + + \frac{(1-\alpha) \cdot 3 + 1 \cdot 5}{(1-\alpha) + 1}. + +The ``ewmvar``, ``ewmstd``, and ``ewmcov`` functions have a ``bias`` argument, +specifying whether the result should contain biased or unbiased statistics. +For example, if ``bias=True``, ``ewmvar(x)`` is calculated as +``ewmvar(x) = ewma(x**2) - ewma(x)**2``; +whereas if ``bias=False`` (the default), the biased variance statistics +are scaled by debiasing factors + +.. math:: + + \frac{\left(\sum_{i=0}^t w_i\right)^2}{\left(\sum_{i=0}^t w_i\right)^2 - \sum_{i=0}^t w_i^2}. - The EW functions perform a standard adjustment to the initial observations - whereby if there are fewer observations than called for in the span, those - observations are reweighted accordingly. +(For :math:`w_i = 1`, this reduces to the usual :math:`N / (N - 1)` factor, +with :math:`N = t + 1`.) +See http://en.wikipedia.org/wiki/Weighted_arithmetic_mean#Weighted_sample_variance +for further details. \ No newline at end of file diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt index 721d232a1931e..4789ac280b9d8 100644 --- a/doc/source/v0.15.0.txt +++ b/doc/source/v0.15.0.txt @@ -83,25 +83,8 @@ API changes rolling_min(s, window=10, min_periods=5) -- :func:`ewma`, :func:`ewmstd`, :func:`ewmvol`, :func:`ewmvar`, :func:`ewmcorr`, and :func:`ewmcov` - now have an optional ``ignore_na`` argument. - When ``ignore_na=False`` (the default), missing values are taken into account in the weights calculation. - When ``ignore_na=True`` (which reproduces the pre-0.15.0 behavior), missing values are ignored in the weights calculation. - (:issue:`7543`) - - .. ipython:: python - - ewma(Series([None, 1., 100.]), com=2.5) - ewma(Series([1., None, 100.]), com=2.5, ignore_na=True) # pre-0.15.0 behavior - ewma(Series([1., None, 100.]), com=2.5, ignore_na=False) # default - -- :func:`ewma`, :func:`ewmstd`, :func:`ewmvol`, :func:`ewmvar`, :func:`ewmcorr`, and :func:`ewmcov` - now set to ``NaN`` the first ``min_periods-1`` entries of the result (for ``min_periods>1``). - Previously the first ``min_periods`` entries of the result were set to ``NaN``. - The new behavior accords with the existing documentation. (:issue:`7884`) - - :func:`rolling_max`, :func:`rolling_min`, :func:`rolling_sum`, :func:`rolling_mean`, :func:`rolling_median`, - :func:`rolling_std`, :func:`rolling_var`, :func:`rolling_skew`, :func:`rolling_kurt`, and :func:`rolling_quantile`, + :func:`rolling_std`, :func:`rolling_var`, :func:`rolling_skew`, :func:`rolling_kurt`, :func:`rolling_quantile`, :func:`rolling_cov`, :func:`rolling_corr`, :func:`rolling_corr_pairwise`, :func:`rolling_window`, and :func:`rolling_apply` with ``center=True`` previously would return a result of the same structure as the input ``arg`` with ``NaN`` in the final ``(window-1)/2`` entries. @@ -112,20 +95,19 @@ API changes .. code-block:: python - In [7]: rolling_sum(Series(range(5)), window=3, min_periods=0, center=True) + In [7]: rolling_sum(Series(range(4)), window=3, min_periods=0, center=True) Out[7]: 0 1 1 3 2 6 - 3 9 - 4 NaN + 3 NaN dtype: float64 - - New behavior (note final value is ``7 = sum([3, 4, NaN])``): + + New behavior (note final value is ``5 = sum([2, 3, NaN])``): .. ipython:: python - rolling_sum(Series(range(5)), window=3, min_periods=0, center=True) + rolling_sum(Series(range(4)), window=3, min_periods=0, center=True) - Removed ``center`` argument from :func:`expanding_max`, :func:`expanding_min`, :func:`expanding_sum`, :func:`expanding_mean`, :func:`expanding_median`, :func:`expanding_std`, :func:`expanding_var`, @@ -133,6 +115,55 @@ API changes :func:`expanding_cov`, :func:`expanding_corr`, :func:`expanding_corr_pairwise`, and :func:`expanding_apply`, as the results produced when ``center=True`` did not make much sense. (:issue:`7925`) +- :func:`ewma`, :func:`ewmstd`, :func:`ewmvol`, :func:`ewmvar`, :func:`ewmcov`, and :func:`ewmcorr` + now interpret ``min_periods`` in the same manner that the ``rolling_*`` and ``expanding_*`` functions do: + a given result entry will be ``NaN`` if the (expanding, in this case) window does not contain + at least ``min_periods`` values. The previous behavior was to set to ``NaN`` the ``min_periods`` entries + starting with the first non- ``NaN`` value. (:issue:`7977`) + + Prior behavior (note values start at index ``2``, which is ``min_periods`` after index ``0`` + (the index of the first non-empty value)): + + .. ipython:: python + + s = Series([1, None, None, None, 2, 3]) + + .. code-block:: python + + In [51]: ewma(s, com=3., min_periods=2) + Out[51]: + 0 NaN + 1 NaN + 2 1.000000 + 3 1.000000 + 4 1.571429 + 5 2.189189 + dtype: float64 + + New behavior (note values start at index ``4``, the location of the 2nd (since ``min_periods=2``) non-empty value): + + .. ipython:: python + + ewma(s, com=3., min_periods=2) + +- :func:`ewmstd`, :func:`ewmvol`, :func:`ewmvar`, :func:`ewmcov`, and :func:`ewmcorr` + now have an optional ``adjust`` argument, just like :func:`ewma` does, + affecting how the weights are calculated. + The default value of ``adjust`` is ``True``, which is backwards-compatible. + See :ref:`Exponentially weighted moment functions ` for details. (:issue:`7911`) + +- :func:`ewma`, :func:`ewmstd`, :func:`ewmvol`, :func:`ewmvar`, :func:`ewmcov`, and :func:`ewmcorr` + now have an optional ``ignore_na`` argument. + When ``ignore_na=False`` (the default), missing values are taken into account in the weights calculation. + When ``ignore_na=True`` (which reproduces the pre-0.15.0 behavior), missing values are ignored in the weights calculation. + (:issue:`7543`) + + .. ipython:: python + + ewma(Series([None, 1., 8.]), com=2.) + ewma(Series([1., None, 8.]), com=2., ignore_na=True) # pre-0.15.0 behavior + ewma(Series([1., None, 8.]), com=2., ignore_na=False) # new default + - Bug in passing a ``DatetimeIndex`` with a timezone that was not being retained in DataFrame construction from a dict (:issue:`7822`) In prior versions this would drop the timezone. @@ -580,12 +611,61 @@ Bug Fixes - Bug in ``DataFrame.plot`` with ``subplots=True`` may draw unnecessary minor xticks and yticks (:issue:`7801`) - Bug in ``StataReader`` which did not read variable labels in 117 files due to difference between Stata documentation and implementation (:issue:`7816`) - Bug in ``StataReader`` where strings were always converted to 244 characters-fixed width irrespective of underlying string size (:issue:`7858`) -- Bug in ``expanding_cov``, ``expanding_corr``, ``rolling_cov``, ``rolling_cov``, ``ewmcov``, and ``ewmcorr`` + +- Bug in :func:`expanding_cov`, :func:`expanding_corr`, :func:`rolling_cov`, :func:`rolling_cor`, :func:`ewmcov`, and :func:`ewmcorr` returning results with columns sorted by name and producing an error for non-unique columns; now handles non-unique columns and returns columns in original order (except for the case of two DataFrames with ``pairwise=False``, where behavior is unchanged) (:issue:`7542`) - Bug in :func:`rolling_count` and ``expanding_*`` functions unnecessarily producing error message for zero-length data (:issue:`8056`) - Bug in :func:`rolling_apply` and :func:`expanding_apply` interpreting ``min_periods=0`` as ``min_periods=1`` (:issue:`8080`) +- Bug in :func:`expanding_std` and :func:`expanding_var` for a single value producing a confusing error message (:issue:`7900`) +- Bug in :func:`rolling_std` and :func:`rolling_var` for a single value producing ``0`` rather than ``NaN`` (:issue:`7900`) + +- Bug in :func:`ewmstd`, :func:`ewmvol`, :func:`ewmvar`, and :func:`ewmcov` + calculation of de-biasing factors when ``bias=False`` (the default). + Previously an incorrect constant factor was used, based on ``adjust=True``, ``ignore_na=True``, + and an infinite number of observations. + Now a different factor is used for each entry, based on the actual weights + (analogous to the usual ``N/(N-1)`` factor). + In particular, for a single point a value of ``NaN`` is returned when ``bias=False``, + whereas previously a value of (approximately) ``0`` was returned. + + For example, consider the following pre-0.15.0 results for ``ewmvar(..., bias=False)``, + and the corresponding debiasing factors: + + .. ipython:: python + + s = Series([1., 2., 0., 4.]) + + .. code-block:: python + + In [69]: ewmvar(s, com=2., bias=False) + Out[69]: + 0 -2.775558e-16 + 1 3.000000e-01 + 2 9.556787e-01 + 3 3.585799e+00 + dtype: float64 + + In [70]: ewmvar(s, com=2., bias=False) / ewmvar(s, com=2., bias=True) + Out[70]: + 0 1.25 + 1 1.25 + 2 1.25 + 3 1.25 + dtype: float64 + + Note that entry ``0`` is approximately 0, and the debiasing factors are a constant 1.25. + By comparison, the following 0.15.0 results have a ``NaN`` for entry ``0``, + and the debiasing factors are decreasing (towards 1.25): + + .. ipython:: python + + ewmvar(s, com=2., bias=False) + ewmvar(s, com=2., bias=False) / ewmvar(s, com=2., bias=True) + + See :ref:`Exponentially weighted moment functions ` for details. (:issue:`7912`) + - Bug in ``DataFrame.plot`` and ``Series.plot`` may ignore ``rot`` and ``fontsize`` keywords (:issue:`7844`) diff --git a/pandas/algos.pyx b/pandas/algos.pyx index c0f0590c22a25..77d8cea4de507 100644 --- a/pandas/algos.pyx +++ b/pandas/algos.pyx @@ -977,7 +977,7 @@ def roll_mean(ndarray[double_t] input, #------------------------------------------------------------------------------- # Exponentially weighted moving average -def ewma(ndarray[double_t] input, double_t com, int adjust, int ignore_na): +def ewma(ndarray[double_t] input, double_t com, int adjust, int ignore_na, int minp): ''' Compute exponentially-weighted moving average using center-of-mass. @@ -987,45 +987,146 @@ def ewma(ndarray[double_t] input, double_t com, int adjust, int ignore_na): com : float64 adjust: int ignore_na: int + minp: int Returns ------- y : ndarray ''' - cdef double cur, prev, neww, oldw, adj - cdef Py_ssize_t i cdef Py_ssize_t N = len(input) - cdef ndarray[double_t] output = np.empty(N, dtype=float) - if N == 0: return output + minp = max(minp, 1) + + cdef double alpha, old_wt_factor, new_wt, weighted_avg, old_wt, cur + cdef Py_ssize_t i, nobs + alpha = 1. / (1. + com) old_wt_factor = 1. - alpha - new_wt = 1.0 if adjust else alpha + new_wt = 1. if adjust else alpha - output[0] = input[0] - weighted_avg = output[0] + weighted_avg = input[0] + is_observation = (weighted_avg == weighted_avg) + nobs = int(is_observation) + output[0] = weighted_avg if (nobs >= minp) else NaN old_wt = 1. for i from 1 <= i < N: cur = input[i] + is_observation = (cur == cur) + nobs += int(is_observation) if weighted_avg == weighted_avg: - if cur == cur: - old_wt *= old_wt_factor - weighted_avg = ((old_wt * weighted_avg) + (new_wt * cur)) / (old_wt + new_wt) - if adjust: - old_wt += new_wt - else: - old_wt = 1. - elif not ignore_na: + if is_observation or (not ignore_na): old_wt *= old_wt_factor - else: + if is_observation: + if weighted_avg != cur: # avoid numerical errors on constant series + weighted_avg = ((old_wt * weighted_avg) + (new_wt * cur)) / (old_wt + new_wt) + if adjust: + old_wt += new_wt + else: + old_wt = 1. + elif is_observation: weighted_avg = cur - output[i] = weighted_avg + output[i] = weighted_avg if (nobs >= minp) else NaN + + return output + +#------------------------------------------------------------------------------- +# Exponentially weighted moving covariance + +def ewmcov(ndarray[double_t] input_x, ndarray[double_t] input_y, + double_t com, int adjust, int ignore_na, int minp, int bias): + ''' + Compute exponentially-weighted moving variance using center-of-mass. + + Parameters + ---------- + input_x : ndarray (float64 type) + input_y : ndarray (float64 type) + com : float64 + adjust: int + ignore_na: int + minp: int + bias: int + + Returns + ------- + y : ndarray + ''' + + cdef Py_ssize_t N = len(input_x) + if len(input_y) != N: + raise ValueError('arrays are of different lengths (%d and %d)' % (N, len(input_y))) + cdef ndarray[double_t] output = np.empty(N, dtype=float) + if N == 0: + return output + + minp = max(minp, 1) + + cdef double alpha, old_wt_factor, new_wt, mean_x, mean_y, cov + cdef double sum_wt, sum_wt2, old_wt, cur_x, cur_y, old_mean_x, old_mean_y + cdef Py_ssize_t i, nobs + + alpha = 1. / (1. + com) + old_wt_factor = 1. - alpha + new_wt = 1. if adjust else alpha + + mean_x = input_x[0] + mean_y = input_y[0] + is_observation = ((mean_x == mean_x) and (mean_y == mean_y)) + nobs = int(is_observation) + if not is_observation: + mean_x = NaN + mean_y = NaN + output[0] = (0. if bias else NaN) if (nobs >= minp) else NaN + cov = 0. + sum_wt = 1. + sum_wt2 = 1. + old_wt = 1. + + for i from 1 <= i < N: + cur_x = input_x[i] + cur_y = input_y[i] + is_observation = ((cur_x == cur_x) and (cur_y == cur_y)) + nobs += int(is_observation) + if mean_x == mean_x: + if is_observation or (not ignore_na): + sum_wt *= old_wt_factor + sum_wt2 *= (old_wt_factor * old_wt_factor) + old_wt *= old_wt_factor + if is_observation: + old_mean_x = mean_x + old_mean_y = mean_y + if mean_x != cur_x: # avoid numerical errors on constant series + mean_x = ((old_wt * old_mean_x) + (new_wt * cur_x)) / (old_wt + new_wt) + if mean_y != cur_y: # avoid numerical errors on constant series + mean_y = ((old_wt * old_mean_y) + (new_wt * cur_y)) / (old_wt + new_wt) + cov = ((old_wt * (cov + ((old_mean_x - mean_x) * (old_mean_y - mean_y)))) + + (new_wt * ((cur_x - mean_x) * (cur_y - mean_y)))) / (old_wt + new_wt) + sum_wt += new_wt + sum_wt2 += (new_wt * new_wt) + old_wt += new_wt + if not adjust: + sum_wt /= old_wt + sum_wt2 /= (old_wt * old_wt) + old_wt = 1. + elif is_observation: + mean_x = cur_x + mean_y = cur_y + + if nobs >= minp: + if not bias: + numerator = sum_wt * sum_wt + denominator = numerator - sum_wt2 + output[i] = ((numerator / denominator) * cov) if (denominator > 0.) else NaN + else: + output[i] = cov + else: + output[i] = NaN return output @@ -1180,7 +1281,7 @@ def roll_var(ndarray[double_t] input, int win, int minp, int ddof=1): mean_x += delta / nobs ssqdm_x += delta * (val - mean_x) - if nobs >= minp: + if (nobs >= minp) and (nobs > ddof): #pathological case if nobs == 1: val = 0 @@ -1224,7 +1325,7 @@ def roll_var(ndarray[double_t] input, int win, int minp, int ddof=1): ssqdm_x = 0 # Variance is unchanged if no observation is added or removed - if nobs >= minp: + if (nobs >= minp) and (nobs > ddof): #pathological case if nobs == 1: val = 0 @@ -1285,17 +1386,14 @@ def roll_skew(ndarray[double_t] input, int win, int minp): xxx -= prev * prev * prev nobs -= 1 - if nobs >= minp: A = x / nobs B = xx / nobs - A * A C = xxx / nobs - A * A * A - 3 * A * B - - R = sqrt(B) - - if B == 0 or nobs < 3: + if B <= 0 or nobs < 3: output[i] = NaN else: + R = sqrt(B) output[i] = ((sqrt(nobs * (nobs - 1.)) * C) / ((nobs-2) * R * R * R)) else: diff --git a/pandas/stats/moments.py b/pandas/stats/moments.py index a2c7cc30e4798..49de02c23cc47 100644 --- a/pandas/stats/moments.py +++ b/pandas/stats/moments.py @@ -80,8 +80,8 @@ halflife : float, optional Specify decay in terms of halflife, :math:`\alpha = 1 - exp(log(0.5) / halflife)` min_periods : int, default 0 - Number of observations in sample to require (only affects - beginning) + Minimum number of observations in window required to have a value + (otherwise result is NA). freq : None or string alias / date offset object, default=None Frequency to conform to before computing statistic adjust : boolean, default True @@ -201,7 +201,8 @@ def rolling_count(arg, window, freq=None, center=False, how=None): of :meth:`~pandas.Series.resample` (i.e. using the `mean`). """ arg = _conv_timerule(arg, freq, how) - window = min(window, len(arg)) + if not center: + window = min(window, len(arg)) return_hook, values = _process_data_structure(arg, kill_inf=False) @@ -211,7 +212,6 @@ def rolling_count(arg, window, freq=None, center=False, how=None): # putmask here? result[np.isnan(result)] = 0 - return return_hook(result) @@ -462,50 +462,46 @@ def _get_center_of_mass(com, span, halflife): @Appender(_doc_template) def ewma(arg, com=None, span=None, halflife=None, min_periods=0, freq=None, adjust=True, how=None, ignore_na=False): - com = _get_center_of_mass(com, span, halflife) arg = _conv_timerule(arg, freq, how) + com = _get_center_of_mass(com, span, halflife) def _ewma(v): - result = algos.ewma(v, com, int(adjust), int(ignore_na)) - if min_periods > 1: - first_index = _first_valid_index(v) - result[first_index: first_index + min_periods - 1] = NaN - return result + return algos.ewma(v, com, int(adjust), int(ignore_na), int(min_periods)) return_hook, values = _process_data_structure(arg) - output = np.apply_along_axis(_ewma, 0, values) + if values.size == 0: + output = values.copy() + else: + output = np.apply_along_axis(_ewma, 0, values) return return_hook(output) -def _first_valid_index(arr): - # argmax scans from left - return notnull(arr).argmax() if len(arr) else 0 - - @Substitution("Exponentially-weighted moving variance", _unary_arg, _ewm_kw+_bias_kw, _type_of_input_retval, _ewm_notes) @Appender(_doc_template) def ewmvar(arg, com=None, span=None, halflife=None, min_periods=0, bias=False, - freq=None, how=None, ignore_na=False): - com = _get_center_of_mass(com, span, halflife) + freq=None, how=None, ignore_na=False, adjust=True): arg = _conv_timerule(arg, freq, how) - moment2nd = ewma(arg * arg, com=com, min_periods=min_periods, ignore_na=ignore_na) - moment1st = ewma(arg, com=com, min_periods=min_periods, ignore_na=ignore_na) + com = _get_center_of_mass(com, span, halflife) - result = moment2nd - moment1st ** 2 - if not bias: - result *= (1.0 + 2.0 * com) / (2.0 * com) + def _ewmvar(v): + return algos.ewmcov(v, v, com, int(adjust), int(ignore_na), int(min_periods), int(bias)) - return result + return_hook, values = _process_data_structure(arg) + if values.size == 0: + output = values.copy() + else: + output = np.apply_along_axis(_ewmvar, 0, values) + return return_hook(output) @Substitution("Exponentially-weighted moving std", _unary_arg, _ewm_kw+_bias_kw, _type_of_input_retval, _ewm_notes) @Appender(_doc_template) def ewmstd(arg, com=None, span=None, halflife=None, min_periods=0, bias=False, - ignore_na=False): + ignore_na=False, adjust=True): result = ewmvar(arg, com=com, span=span, halflife=halflife, - min_periods=min_periods, bias=bias, ignore_na=ignore_na) + min_periods=min_periods, bias=bias, adjust=adjust, ignore_na=ignore_na) return _zsqrt(result) ewmvol = ewmstd @@ -515,7 +511,7 @@ def ewmstd(arg, com=None, span=None, halflife=None, min_periods=0, bias=False, _ewm_kw+_pairwise_kw, _type_of_input_retval, _ewm_notes) @Appender(_doc_template) def ewmcov(arg1, arg2=None, com=None, span=None, halflife=None, min_periods=0, - bias=False, freq=None, pairwise=None, how=None, ignore_na=False): + bias=False, freq=None, pairwise=None, how=None, ignore_na=False, adjust=True): if arg2 is None: arg2 = arg1 pairwise = True if pairwise is None else pairwise @@ -525,17 +521,17 @@ def ewmcov(arg1, arg2=None, com=None, span=None, halflife=None, min_periods=0, pairwise = True if pairwise is None else pairwise arg1 = _conv_timerule(arg1, freq, how) arg2 = _conv_timerule(arg2, freq, how) + com = _get_center_of_mass(com, span, halflife) def _get_ewmcov(X, Y): - mean = lambda x: ewma(x, com=com, span=span, halflife=halflife, min_periods=min_periods, - ignore_na=ignore_na) - return (mean(X * Y) - mean(X) * mean(Y)) + # X and Y have the same structure (and NaNs) when called from _flex_binary_moment() + return_hook, x_values = _process_data_structure(X) + return_hook, y_values = _process_data_structure(Y) + cov = algos.ewmcov(x_values, y_values, com, int(adjust), int(ignore_na), int(min_periods), int(bias)) + return return_hook(cov) + result = _flex_binary_moment(arg1, arg2, _get_ewmcov, pairwise=bool(pairwise)) - if not bias: - com = _get_center_of_mass(com, span, halflife) - result *= (1.0 + 2.0 * com) / (2.0 * com) - return result @@ -543,7 +539,7 @@ def _get_ewmcov(X, Y): _ewm_kw+_pairwise_kw, _type_of_input_retval, _ewm_notes) @Appender(_doc_template) def ewmcorr(arg1, arg2=None, com=None, span=None, halflife=None, min_periods=0, - freq=None, pairwise=None, how=None, ignore_na=False): + freq=None, pairwise=None, how=None, ignore_na=False, adjust=True): if arg2 is None: arg2 = arg1 pairwise = True if pairwise is None else pairwise @@ -553,13 +549,18 @@ def ewmcorr(arg1, arg2=None, com=None, span=None, halflife=None, min_periods=0, pairwise = True if pairwise is None else pairwise arg1 = _conv_timerule(arg1, freq, how) arg2 = _conv_timerule(arg2, freq, how) + com = _get_center_of_mass(com, span, halflife) def _get_ewmcorr(X, Y): - mean = lambda x: ewma(x, com=com, span=span, halflife=halflife, min_periods=min_periods, - ignore_na=ignore_na) - var = lambda x: ewmvar(x, com=com, span=span, halflife=halflife, min_periods=min_periods, - bias=True, ignore_na=ignore_na) - return (mean(X * Y) - mean(X) * mean(Y)) / _zsqrt(var(X) * var(Y)) + # X and Y have the same structure (and NaNs) when called from _flex_binary_moment() + return_hook, x_values = _process_data_structure(X) + return_hook, y_values = _process_data_structure(Y) + cov = algos.ewmcov(x_values, y_values, com, int(adjust), int(ignore_na), int(min_periods), 1) + x_var = algos.ewmcov(x_values, x_values, com, int(adjust), int(ignore_na), int(min_periods), 1) + y_var = algos.ewmcov(y_values, y_values, com, int(adjust), int(ignore_na), int(min_periods), 1) + corr = cov / _zsqrt(x_var * y_var) + return return_hook(corr) + result = _flex_binary_moment(arg1, arg2, _get_ewmcorr, pairwise=bool(pairwise)) return result @@ -886,9 +887,9 @@ def call_cython(arg, window, minp, args=(), kwargs={}, **kwds): expanding_std = _expanding_func(_ts_std, 'Unbiased expanding standard deviation.', - check_minp=_require_min_periods(2)) + check_minp=_require_min_periods(1)) expanding_var = _expanding_func(algos.roll_var, 'Unbiased expanding variance.', - check_minp=_require_min_periods(2)) + check_minp=_require_min_periods(1)) expanding_skew = _expanding_func( algos.roll_skew, 'Unbiased expanding skewness.', check_minp=_require_min_periods(3)) @@ -961,7 +962,7 @@ def expanding_cov(arg1, arg2=None, min_periods=1, freq=None, pairwise=None): min_periods = arg2 arg2 = arg1 pairwise = True if pairwise is None else pairwise - window = len(arg1) + len(arg2) + window = max((len(arg1) + len(arg2)), min_periods) if min_periods else (len(arg1) + len(arg2)) return rolling_cov(arg1, arg2, window, min_periods=min_periods, freq=freq, pairwise=pairwise) @@ -978,7 +979,7 @@ def expanding_corr(arg1, arg2=None, min_periods=1, freq=None, pairwise=None): min_periods = arg2 arg2 = arg1 pairwise = True if pairwise is None else pairwise - window = len(arg1) + len(arg2) + window = max((len(arg1) + len(arg2)), min_periods) if min_periods else (len(arg1) + len(arg2)) return rolling_corr(arg1, arg2, window, min_periods=min_periods, freq=freq, pairwise=pairwise) @@ -1025,6 +1026,6 @@ def expanding_apply(arg, func, min_periods=1, freq=None, frequency by resampling the data. This is done with the default parameters of :meth:`~pandas.Series.resample` (i.e. using the `mean`). """ - window = len(arg) + window = max(len(arg), min_periods) if min_periods else len(arg) return rolling_apply(arg, window, func, min_periods=min_periods, freq=freq, args=args, kwargs=kwargs) diff --git a/pandas/stats/tests/test_moments.py b/pandas/stats/tests/test_moments.py index 2c2a19660f266..1d0be4ce48f4f 100644 --- a/pandas/stats/tests/test_moments.py +++ b/pandas/stats/tests/test_moments.py @@ -270,8 +270,12 @@ def test_rolling_std(self): def test_rolling_std_1obs(self): result = mom.rolling_std(np.array([1., 2., 3., 4., 5.]), 1, min_periods=1) - expected = np.zeros(5) + expected = np.array([np.nan] * 5) + assert_almost_equal(result, expected) + result = mom.rolling_std(np.array([1., 2., 3., 4., 5.]), + 1, min_periods=1, ddof=0) + expected = np.zeros(5) assert_almost_equal(result, expected) result = mom.rolling_std(np.array([np.nan, np.nan, 3., 4., 5.]), @@ -642,10 +646,9 @@ def _check_ew_ndarray(self, func, preserve_nan=False): self.assertTrue(np.isnan(result.values[:10]).all()) self.assertFalse(np.isnan(result.values[10:]).any()) else: - # ewmstd, ewmvol, ewmvar *should* require at least two values, - # but currently require only one, for some reason - self.assertTrue(np.isnan(result.values[:10]).all()) - self.assertFalse(np.isnan(result.values[10:]).any()) + # ewmstd, ewmvol, ewmvar (with bias=False) require at least two values + self.assertTrue(np.isnan(result.values[:11]).all()) + self.assertFalse(np.isnan(result.values[11:]).any()) # check series of length 0 result = func(Series([]), 50, min_periods=min_periods) @@ -656,9 +659,8 @@ def _check_ew_ndarray(self, func, preserve_nan=False): if func == mom.ewma: assert_series_equal(result, Series([1.])) else: - # ewmstd, ewmvol, ewmvar *should* require at least two values, - # so should return NaN, but currently require one, so return 0. - assert_series_equal(result, Series([0.])) + # ewmstd, ewmvol, ewmvar with bias=False require at least two values + assert_series_equal(result, Series([np.NaN])) # pass in ints result2 = func(np.arange(50), span=10) @@ -670,6 +672,342 @@ def _check_ew_structures(self, func): frame_result = func(self.frame, com=10) self.assertEqual(type(frame_result), DataFrame) + def _test_series(self): + return [Series(), + Series([np.nan]), + Series([np.nan, np.nan]), + Series([3.]), + Series([np.nan, 3.]), + Series([3., np.nan]), + Series([1., 3.]), + Series([2., 2.]), + Series([3., 1.]), + Series([5., 5., 5., 5., np.nan, np.nan, np.nan, 5., 5., np.nan, np.nan]), + Series([np.nan, 5., 5., 5., np.nan, np.nan, np.nan, 5., 5., np.nan, np.nan]), + Series([np.nan, np.nan, 5., 5., np.nan, np.nan, np.nan, 5., 5., np.nan, np.nan]), + Series([np.nan, 3., np.nan, 3., 4., 5., 6., np.nan, np.nan, 7., 12., 13., 14., 15.]), + Series([np.nan, 5., np.nan, 2., 4., 0., 9., np.nan, np.nan, 3., 12., 13., 14., 15.]), + Series([2., 3., np.nan, 3., 4., 5., 6., np.nan, np.nan, 7., 12., 13., 14., 15.]), + Series([2., 5., np.nan, 2., 4., 0., 9., np.nan, np.nan, 3., 12., 13., 14., 15.]), + Series(range(10)), + Series(range(20, 0, -2)), + ] + + def _test_dataframes(self): + return [DataFrame(), + DataFrame(columns=['a']), + DataFrame(columns=['a', 'a']), + DataFrame(columns=['a', 'b']), + DataFrame(np.arange(10).reshape((5, 2))), + DataFrame(np.arange(25).reshape((5, 5))), + DataFrame(np.arange(25).reshape((5, 5)), columns=['a', 'b', 99, 'd', 'd']), + ] + [DataFrame(s) for s in self._test_series()] + + def _test_data(self): + return self._test_series() + self._test_dataframes() + + def _test_moments_consistency(self, + min_periods, + count, mean, mock_mean, corr, + var_unbiased=None, std_unbiased=None, cov_unbiased=None, + var_biased=None, std_biased=None, cov_biased=None, + var_debiasing_factors=None): + + def _non_null_values(x): + return set([v for v in x.values.reshape(x.values.size) if notnull(v)]) + + for x in self._test_data(): + assert_equal = assert_series_equal if isinstance(x, Series) else assert_frame_equal + is_constant = (len(_non_null_values(x)) == 1) + count_x = count(x) + mean_x = mean(x) + + if mock_mean: + # check that mean equals mock_mean + expected = mock_mean(x) + assert_equal(mean_x, expected) + + # check that correlation of a series with itself is either 1 or NaN + corr_x_x = corr(x, x) + # self.assertTrue(_non_null_values(corr_x_x).issubset(set([1.]))) # restore once rolling_cov(x, x) is identically equal to var(x) + + if is_constant: + # check mean of constant series + expected = x * np.nan + expected[count_x >= max(min_periods, 1)] = x.max().max() + assert_equal(mean_x, expected) + + # check correlation of constant series with itself is NaN + expected[:] = np.nan + assert_equal(corr_x_x, expected) + + if var_unbiased and var_biased and var_debiasing_factors: + # check variance debiasing factors + var_unbiased_x = var_unbiased(x) + var_biased_x = var_biased(x) + var_debiasing_factors_x = var_debiasing_factors(x) + assert_equal(var_unbiased_x, var_biased_x * var_debiasing_factors_x) + + for (std, var, cov) in [(std_biased, var_biased, cov_biased), + (std_unbiased, var_unbiased, cov_unbiased)]: + + # check that var(x), std(x), and cov(x) are all >= 0 + var_x = var(x) + std_x = std(x) + self.assertFalse((var_x < 0).any().any()) + self.assertFalse((std_x < 0).any().any()) + if cov: + cov_x_x = cov(x, x) + self.assertFalse((cov_x_x < 0).any().any()) + + # check that var(x) == cov(x, x) + assert_equal(var_x, cov_x_x) + + # check that var(x) == std(x)^2 + assert_equal(var_x, std_x * std_x) + + if var is var_biased: + # check that biased var(x) == mean(x^2) - mean(x)^2 + mean_x2 = mean(x * x) + assert_equal(var_x, mean_x2 - (mean_x * mean_x)) + + if is_constant: + # check that variance of constant series is identically 0 + self.assertFalse((var_x > 0).any().any()) + expected = x * np.nan + expected[count_x >= max(min_periods, 1)] = 0. + if var is var_unbiased: + expected[count_x < 2] = np.nan + assert_equal(var_x, expected) + + if isinstance(x, Series): + for y in self._test_data(): + if not x.isnull().equals(y.isnull()): + # can only easily test two Series with similar structure + continue + + # check that cor(x, y) is symmetric + corr_x_y = corr(x, y) + corr_y_x = corr(y, x) + assert_equal(corr_x_y, corr_y_x) + + if cov: + # check that cov(x, y) is symmetric + cov_x_y = cov(x, y) + cov_y_x = cov(y, x) + assert_equal(cov_x_y, cov_y_x) + + # check that cov(x, y) == (var(x+y) - var(x) - var(y)) / 2 + var_x_plus_y = var(x + y) + var_y = var(y) + assert_equal(cov_x_y, 0.5 * (var_x_plus_y - var_x - var_y)) + + # check that corr(x, y) == cov(x, y) / (std(x) * std(y)) + std_y = std(y) + assert_equal(corr_x_y, cov_x_y / (std_x * std_y)) + + if cov is cov_biased: + # check that biased cov(x, y) == mean(x*y) - mean(x)*mean(y) + mean_y = mean(y) + mean_x_times_y = mean(x * y) + assert_equal(cov_x_y, mean_x_times_y - (mean_x * mean_y)) + + def test_ewm_consistency(self): + + def _weights(s, com, adjust, ignore_na): + if isinstance(s, DataFrame): + w = DataFrame(index=s.index, columns=s.columns) + for i, _ in enumerate(s.columns): + w.iloc[:, i] = _weights(s.iloc[:, i], com=com, adjust=adjust, ignore_na=ignore_na) + return w + + w = Series(np.nan, index=s.index) + alpha = 1. / (1. + com) + if ignore_na: + w[s.notnull()] = _weights(s[s.notnull()], com=com, adjust=adjust, ignore_na=False) + elif adjust: + for i in range(len(s)): + if s.iat[i] == s.iat[i]: + w.iat[i] = pow(1. / (1. - alpha), i) + else: + sum_wts = 0. + prev_i = -1 + for i in range(len(s)): + if s.iat[i] == s.iat[i]: + if prev_i == -1: + w.iat[i] = 1. + else: + w.iat[i] = alpha * sum_wts / pow(1. - alpha, i - prev_i) + sum_wts += w.iat[i] + prev_i = i + return w + + def _variance_debiasing_factors(s, com, adjust, ignore_na): + weights = _weights(s, com=com, adjust=adjust, ignore_na=ignore_na) + cum_sum = weights.cumsum().fillna(method='ffill') + cum_sum_sq = (weights * weights).cumsum().fillna(method='ffill') + numerator = cum_sum * cum_sum + denominator = numerator - cum_sum_sq + denominator[denominator <= 0.] = np.nan + return numerator / denominator + + def _ewma(s, com, min_periods, adjust, ignore_na): + weights = _weights(s, com=com, adjust=adjust, ignore_na=ignore_na) + result = s.multiply(weights).cumsum().divide(weights.cumsum()).fillna(method='ffill') + result[mom.expanding_count(s) < (max(min_periods, 1) if min_periods else 1)] = np.nan + return result + + com = 3. + for min_periods in [0, 1, 2, 3, 4]: + for adjust in [True, False]: + for ignore_na in [False, True]: + # test consistency between different ewm* moments + self._test_moments_consistency( + min_periods=min_periods, + count=mom.expanding_count, + mean=lambda x: mom.ewma(x, com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na), + mock_mean=lambda x: _ewma(x, com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na), + corr=lambda x, y: mom.ewmcorr(x, y, com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na), + var_unbiased=lambda x: mom.ewmvar(x, com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na, bias=False), + std_unbiased=lambda x: mom.ewmstd(x, com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na, bias=False), + cov_unbiased=lambda x, y: mom.ewmcov(x, y, com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na, bias=False), + var_biased=lambda x: mom.ewmvar(x, com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na, bias=True), + std_biased=lambda x: mom.ewmstd(x, com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na, bias=True), + cov_biased=lambda x, y: mom.ewmcov(x, y, com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na, bias=True), + var_debiasing_factors=lambda x: _variance_debiasing_factors(x, com=com, adjust=adjust, ignore_na=ignore_na)) + + def test_expanding_consistency(self): + for min_periods in [0, 1, 2, 3, 4]: + + # test consistency between different expanding_* moments + self._test_moments_consistency( + min_periods=min_periods, + count=mom.expanding_count, + mean=lambda x: mom.expanding_mean(x, min_periods=min_periods), + mock_mean=lambda x: mom.expanding_sum(x, min_periods=min_periods) / mom.expanding_count(x), + corr=lambda x, y: mom.expanding_corr(x, y, min_periods=min_periods), + var_unbiased=lambda x: mom.expanding_var(x, min_periods=min_periods), + std_unbiased=lambda x: mom.expanding_std(x, min_periods=min_periods), + cov_unbiased=lambda x, y: mom.expanding_cov(x, y, min_periods=min_periods), + var_biased=lambda x: mom.expanding_var(x, min_periods=min_periods, ddof=0), + std_biased=lambda x: mom.expanding_std(x, min_periods=min_periods, ddof=0), + cov_biased=None, + var_debiasing_factors=lambda x: mom.expanding_count(x) / (mom.expanding_count(x) - 1.).replace(0., np.nan) + ) + + # test consistency between expanding_xyz() and expanding_apply of Series/DataFrame.xyz() + for x in self._test_data(): + assert_equal = assert_series_equal if isinstance(x, Series) else assert_frame_equal + for (expanding_f, f, require_min_periods) in [ + (mom.expanding_count, lambda v: Series(v).count(), None), + (mom.expanding_max, lambda v: Series(v).max(), None), + (mom.expanding_min, lambda v: Series(v).min(), None), + (mom.expanding_sum, lambda v: Series(v).sum(), None), + (mom.expanding_mean, lambda v: Series(v).mean(), None), + (mom.expanding_std, lambda v: Series(v).std(), 1), + (mom.expanding_cov, lambda v: Series(v).cov(Series(v)), None), + (mom.expanding_corr, lambda v: Series(v).corr(Series(v)), None), + (mom.expanding_var, lambda v: Series(v).var(), 1), + #(mom.expanding_skew, lambda v: Series(v).skew(), 3), # restore once GH 8086 is fixed + #(mom.expanding_kurt, lambda v: Series(v).kurt(), 4), # restore once GH 8086 is fixed + #(lambda x, min_periods: mom.expanding_quantile(x, 0.3, min_periods=min_periods), + # lambda v: Series(v).quantile(0.3), None), # restore once GH 8084 is fixed + (mom.expanding_median, lambda v: Series(v).median(), None), + ]: + if require_min_periods and (min_periods is not None) and (min_periods < require_min_periods): + continue + + if expanding_f is mom.expanding_count: + expanding_f_result = expanding_f(x) + expanding_apply_f_result = mom.expanding_apply(x, func=f, min_periods=0) + else: + if expanding_f in [mom.expanding_cov, mom.expanding_corr]: + expanding_f_result = expanding_f(x, min_periods=min_periods, pairwise=False) + else: + expanding_f_result = expanding_f(x, min_periods=min_periods) + expanding_apply_f_result = mom.expanding_apply(x, func=f, min_periods=min_periods) + assert_equal(expanding_f_result, expanding_apply_f_result) + + if (expanding_f in [mom.expanding_cov, mom.expanding_corr]) and isinstance(x, DataFrame): + # test pairwise=True + expanding_f_result = expanding_f(x, x, min_periods=min_periods, pairwise=True) + expected = Panel(items=x.index, major_axis=x.columns, minor_axis=x.columns) + for i, _ in enumerate(x.columns): + for j, _ in enumerate(x.columns): + expected.iloc[:, i, j] = expanding_f(x.iloc[:, i], x.iloc[:, j], min_periods=min_periods) + assert_panel_equal(expanding_f_result, expected) + + def test_rolling_consistency(self): + for window in [1, 3, 10, 20]: + for min_periods in set([0, 1, 2, 3, 4, window]): + if min_periods and (min_periods > window): + continue + for center in [False, True]: + + # test consistency between different rolling_* moments + self._test_moments_consistency( + min_periods=min_periods, + count=lambda x: mom.rolling_count(x, window=window, center=center), + mean=lambda x: mom.rolling_mean(x, window=window, min_periods=min_periods, center=center), + mock_mean=lambda x: mom.rolling_sum(x, window=window, min_periods=min_periods, center=center).divide( + mom.rolling_count(x, window=window, center=center)), + corr=lambda x, y: mom.rolling_corr(x, y, window=window, min_periods=min_periods, center=center), + var_unbiased=lambda x: mom.rolling_var(x, window=window, min_periods=min_periods, center=center), + std_unbiased=lambda x: mom.rolling_std(x, window=window, min_periods=min_periods, center=center), + cov_unbiased=lambda x, y: mom.rolling_cov(x, y, window=window, min_periods=min_periods, center=center), + var_biased=lambda x: mom.rolling_var(x, window=window, min_periods=min_periods, center=center, ddof=0), + std_biased=lambda x: mom.rolling_std(x, window=window, min_periods=min_periods, center=center, ddof=0), + cov_biased=None, + var_debiasing_factors=lambda x: mom.rolling_count(x, window=window, center=center).divide( + (mom.rolling_count(x, window=window, center=center) - 1.).replace(0., np.nan)), + ) + + # test consistency between rolling_xyz and rolling_apply of Series/DataFrame.xyz + for x in self._test_data(): + assert_equal = assert_series_equal if isinstance(x, Series) else assert_frame_equal + for (rolling_f, f, require_min_periods) in [ + (mom.rolling_count, lambda v: Series(v).count(), None), + (mom.rolling_max, lambda v: Series(v).max(), None), + (mom.rolling_min, lambda v: Series(v).min(), None), + (mom.rolling_sum, lambda v: Series(v).sum(), None), + (mom.rolling_mean, lambda v: Series(v).mean(), None), + (mom.rolling_std, lambda v: Series(v).std(), 1), + (mom.rolling_cov, lambda v: Series(v).cov(Series(v)), None), + (mom.rolling_corr, lambda v: Series(v).corr(Series(v)), None), + (mom.rolling_var, lambda v: Series(v).var(), 1), + #(mom.rolling_skew, lambda v: Series(v).skew(), 3), # restore once GH 8086 is fixed + # (mom.rolling_kurt, lambda v: Series(v).kurt(), 4), # restore once GH 8086 is fixed + #(lambda x, window, min_periods, center: mom.rolling_quantile(x, window, 0.3, min_periods=min_periods, center=center), + # lambda v: Series(v).quantile(0.3), None), # restore once GH 8084 is fixed + (mom.rolling_median, lambda v: Series(v).median(), None), + ]: + if require_min_periods and (min_periods is not None) and (min_periods < require_min_periods): + continue + + if rolling_f is mom.rolling_count: + rolling_f_result = rolling_f(x, window=window, center=center) + rolling_apply_f_result = mom.rolling_apply(x, window=window, func=f, + min_periods=0, center=center) + else: + if rolling_f in [mom.rolling_cov, mom.rolling_corr]: + rolling_f_result = rolling_f(x, window=window, min_periods=min_periods, center=center, pairwise=False) + else: + rolling_f_result = rolling_f(x, window=window, min_periods=min_periods, center=center) + rolling_apply_f_result = mom.rolling_apply(x, window=window, func=f, + min_periods=min_periods, center=center) + assert_equal(rolling_f_result, rolling_apply_f_result) + + if (rolling_f in [mom.rolling_cov, mom.rolling_corr]) and isinstance(x, DataFrame): + # test pairwise=True + rolling_f_result = rolling_f(x, x, window=window, min_periods=min_periods, + center=center, pairwise=True) + expected = Panel(items=x.index, major_axis=x.columns, minor_axis=x.columns) + for i, _ in enumerate(x.columns): + for j, _ in enumerate(x.columns): + expected.iloc[:, i, j] = rolling_f(x.iloc[:, i], x.iloc[:, j], + window=window, min_periods=min_periods, center=center) + assert_panel_equal(rolling_f_result, expected) + # binary moments def test_rolling_cov(self): A = self.series @@ -786,14 +1124,9 @@ def _check_binary_ew(self, func): # GH 7898 for min_periods in (0, 1, 2): result = func(A, B, 20, min_periods=min_periods) - # binary functions (ewmcov, ewmcorr) *should* require at least two values - if (func == mom.ewmcov) and (min_periods <= 1): - # currenty ewmcov requires only one value, for some reason. - self.assertTrue(np.isnan(result.values[:10]).all()) - self.assertFalse(np.isnan(result.values[10:]).any()) - else: - self.assertTrue(np.isnan(result.values[:11]).all()) - self.assertFalse(np.isnan(result.values[11:]).any()) + # binary functions (ewmcov, ewmcorr) with bias=False require at least two values + self.assertTrue(np.isnan(result.values[:11]).all()) + self.assertFalse(np.isnan(result.values[11:]).any()) # check series of length 0 result = func(Series([]), Series([]), 50, min_periods=min_periods) @@ -801,11 +1134,7 @@ def _check_binary_ew(self, func): # check series of length 1 result = func(Series([1.]), Series([1.]), 50, min_periods=min_periods) - if (func == mom.ewmcov) and (min_periods <= 1): - # currenty ewmcov requires only one value, for some reason. - assert_series_equal(result, Series([0.])) - else: - assert_series_equal(result, Series([np.NaN])) + assert_series_equal(result, Series([np.NaN])) self.assertRaises(Exception, func, A, randn(50), 20, min_periods=5)