From 742e71329f1b685d82907078fc8e461bd9c44296 Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 11 Oct 2020 00:37:33 +0200 Subject: [PATCH 1/3] ENH: Implement sem for Rolling and Expanding --- doc/source/reference/window.rst | 2 + doc/source/user_guide/computation.rst | 2 + doc/source/whatsnew/v1.2.0.rst | 1 + pandas/core/window/rolling.py | 56 +++++++++++++++++++++++++++ pandas/tests/window/test_expanding.py | 11 ++++++ pandas/tests/window/test_rolling.py | 11 ++++++ 6 files changed, 83 insertions(+) diff --git a/doc/source/reference/window.rst b/doc/source/reference/window.rst index 611c0e0f7f160..77697b966df18 100644 --- a/doc/source/reference/window.rst +++ b/doc/source/reference/window.rst @@ -32,6 +32,7 @@ Standard moving window functions Rolling.apply Rolling.aggregate Rolling.quantile + Rolling.sem Window.mean Window.sum Window.var @@ -61,6 +62,7 @@ Standard expanding window functions Expanding.apply Expanding.aggregate Expanding.quantile + Expanding.sem Exponentially-weighted moving window functions ---------------------------------------------- diff --git a/doc/source/user_guide/computation.rst b/doc/source/user_guide/computation.rst index 75fb3380821d8..b24020848b363 100644 --- a/doc/source/user_guide/computation.rst +++ b/doc/source/user_guide/computation.rst @@ -328,6 +328,7 @@ We provide a number of common statistical functions: :meth:`~Rolling.apply`, Generic apply :meth:`~Rolling.cov`, Sample covariance (binary) :meth:`~Rolling.corr`, Sample correlation (binary) + :meth:`~Rolling.sem`, Standard error of mean .. _computation.window_variance.caveats: @@ -938,6 +939,7 @@ Method summary :meth:`~Expanding.apply`, Generic apply :meth:`~Expanding.cov`, Sample covariance (binary) :meth:`~Expanding.corr`, Sample correlation (binary) + :meth:`~Expanding.sem`, Standard error of mean .. note:: diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 2f462b16ddf78..1022d16c1df30 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -190,6 +190,7 @@ Other enhancements - :meth:`DatetimeIndex.searchsorted`, :meth:`TimedeltaIndex.searchsorted`, :meth:`PeriodIndex.searchsorted`, and :meth:`Series.searchsorted` with datetimelike dtypes will now try to cast string arguments (listlike and scalar) to the matching datetimelike type (:issue:`36346`) - Added methods :meth:`IntegerArray.prod`, :meth:`IntegerArray.min`, and :meth:`IntegerArray.max` (:issue:`33790`) - Where possible :meth:`RangeIndex.difference` and :meth:`RangeIndex.symmetric_difference` will return :class:`RangeIndex` instead of :class:`Int64Index` (:issue:`36564`) +- Added :meth:`Rolling.sem()` and :meth:`Expanding.sem()` to compute the standard error of mean (:issue:`26476`). .. _whatsnew_120.api_breaking.python: diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 9e829ef774d42..2b0d0a34cf5dd 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1573,6 +1573,57 @@ def skew(self, **kwargs): """ ) + def sem(self, ddof=1, *args, **kwargs): + return self.std(*args, **kwargs) / (self.count() - ddof).pow(0.5) + + _shared_docs["sem"] = dedent( + """ + Compute %(name)s standard error of mean. + + Parameters + ---------- + + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements. + + *args, **kwargs + For NumPy compatibility. No additional arguments are used. + + Series or DataFrame + Returned object type is determined by the caller of the %(name)s + calculation. + + See Also + -------- + pandas.Series.%(name)s : Calling object with Series data. + pandas.DataFrame.%(name)s : Calling object with DataFrames. + pandas.Series.sem : Equivalent method for Series. + pandas.DataFrame.sem : Equivalent method for DataFrame. + + Notes + ----- + A minimum of one period is required for the rolling calculation. + + Examples + -------- + >>> s = pd.Series([0, 1, 2, 3]) + >>> s.rolling(2, min_periods=1).sem() + 0 NaN + 1 0.707107 + 2 0.707107 + 3 0.707107 + dtype: float64 + + >>> s.expanding().sem() + 0 NaN + 1 0.707107 + 2 0.707107 + 3 0.707107 + dtype: float64 + """ + ) + def kurt(self, **kwargs): window_func = self._get_roll_func("roll_kurt") kwargs.pop("require_min_periods", None) @@ -2081,6 +2132,11 @@ def var(self, ddof=1, *args, **kwargs): def skew(self, **kwargs): return super().skew(**kwargs) + @Substitution(name="rolling") + @Appender(_shared_docs["sem"]) + def sem(self, ddof=1, *args, **kwargs): + return self.std(*args, **kwargs) / (self.count() - ddof).pow(0.5) + _agg_doc = dedent( """ Examples diff --git a/pandas/tests/window/test_expanding.py b/pandas/tests/window/test_expanding.py index e5006fd391f90..b06a506281047 100644 --- a/pandas/tests/window/test_expanding.py +++ b/pandas/tests/window/test_expanding.py @@ -236,3 +236,14 @@ def test_center_deprecate_warning(): with tm.assert_produces_warning(None): df.expanding() + + +@pytest.mark.parametrize("constructor", ["DataFrame", "Series"]) +def test_expanding_sem(constructor): + # GH: 26476 + obj = getattr(pd, constructor)([0, 1, 2]) + result = obj.expanding().sem() + if isinstance(result, DataFrame): + result = pd.Series(result[0].values) + expected = pd.Series([np.nan] + [0.707107] * 2) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 73831d518032d..a5fbc3c94786c 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -868,3 +868,14 @@ def test_rolling_period_index(index, window, func, values): result = getattr(ds.rolling(window, closed="left"), func)() expected = pd.Series(values, index=index) tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("constructor", ["DataFrame", "Series"]) +def test_rolling_sem(constructor): + # GH: 26476 + obj = getattr(pd, constructor)([0, 1, 2]) + result = obj.rolling(2, min_periods=1).sem() + if isinstance(result, DataFrame): + result = pd.Series(result[0].values) + expected = pd.Series([np.nan] + [0.707107] * 2) + tm.assert_series_equal(result, expected) From dde3ace7d8fb5da728f2121316da58fd39dd3de6 Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 11 Oct 2020 21:43:26 +0200 Subject: [PATCH 2/3] Add test and fix docs --- pandas/core/window/expanding.py | 5 +++++ pandas/core/window/rolling.py | 2 ++ pandas/tests/window/test_grouper.py | 18 ++++++++++++++++++ 3 files changed, 25 insertions(+) diff --git a/pandas/core/window/expanding.py b/pandas/core/window/expanding.py index 319944fd48eae..c24c5d5702764 100644 --- a/pandas/core/window/expanding.py +++ b/pandas/core/window/expanding.py @@ -192,6 +192,11 @@ def var(self, ddof=1, *args, **kwargs): nv.validate_expanding_func("var", args, kwargs) return super().var(ddof=ddof, **kwargs) + @Substitution(name="expanding") + @Appender(_shared_docs["sem"]) + def sem(self, ddof=1, *args, **kwargs): + return super().sem(ddof=ddof, **kwargs) + @Substitution(name="expanding", func_name="skew") @Appender(_doc_template) @Appender(_shared_docs["skew"]) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 2b0d0a34cf5dd..f955587407a54 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1590,6 +1590,8 @@ def sem(self, ddof=1, *args, **kwargs): *args, **kwargs For NumPy compatibility. No additional arguments are used. + Returns + ------- Series or DataFrame Returned object type is determined by the caller of the %(name)s calculation. diff --git a/pandas/tests/window/test_grouper.py b/pandas/tests/window/test_grouper.py index 6b80f65c16fa6..034f941462bb5 100644 --- a/pandas/tests/window/test_grouper.py +++ b/pandas/tests/window/test_grouper.py @@ -531,3 +531,21 @@ def test_groupby_rolling_count_closed_on(self): ), ) tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + ("func", "kwargs"), + [("rolling", {"window": 2, "min_periods": 1}), ("expanding", {})], + ) + def test_groupby_rolling_sem(self, func, kwargs): + # GH: 26476 + df = pd.DataFrame( + [["a", 1], ["a", 2], ["b", 1], ["b", 2], ["b", 3]], columns=["a", "b"] + ) + result = getattr(df.groupby("a"), func)(**kwargs).sem() + expected = pd.DataFrame( + {"a": [np.nan] * 5, "b": [np.nan, 0.70711, np.nan, 0.70711, 0.70711]}, + index=pd.MultiIndex.from_tuples( + [("a", 0), ("a", 1), ("b", 2), ("b", 3), ("b", 4)], names=["a", None] + ), + ) + tm.assert_frame_equal(result, expected) From 8c9782a50bffe1d6b24eca3530c6feb593727426 Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 11 Oct 2020 22:17:53 +0200 Subject: [PATCH 3/3] Fix docstring --- pandas/core/window/rolling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index f955587407a54..5398c14c8774a 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1621,7 +1621,7 @@ def sem(self, ddof=1, *args, **kwargs): 0 NaN 1 0.707107 2 0.707107 - 3 0.707107 + 3 0.745356 dtype: float64 """ )