Skip to content

Commit 773b9fb

Browse files
authored
ENH: Implement sem for Rolling and Expanding (#37043)
1 parent 40d0243 commit 773b9fb

File tree

8 files changed

+108
-0
lines changed

8 files changed

+108
-0
lines changed

doc/source/reference/window.rst

+2
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ Standard moving window functions
3232
Rolling.apply
3333
Rolling.aggregate
3434
Rolling.quantile
35+
Rolling.sem
3536
Window.mean
3637
Window.sum
3738
Window.var
@@ -61,6 +62,7 @@ Standard expanding window functions
6162
Expanding.apply
6263
Expanding.aggregate
6364
Expanding.quantile
65+
Expanding.sem
6466

6567
Exponentially-weighted moving window functions
6668
----------------------------------------------

doc/source/user_guide/computation.rst

+2
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,7 @@ We provide a number of common statistical functions:
328328
:meth:`~Rolling.apply`, Generic apply
329329
:meth:`~Rolling.cov`, Sample covariance (binary)
330330
:meth:`~Rolling.corr`, Sample correlation (binary)
331+
:meth:`~Rolling.sem`, Standard error of mean
331332

332333
.. _computation.window_variance.caveats:
333334

@@ -938,6 +939,7 @@ Method summary
938939
:meth:`~Expanding.apply`, Generic apply
939940
:meth:`~Expanding.cov`, Sample covariance (binary)
940941
:meth:`~Expanding.corr`, Sample correlation (binary)
942+
:meth:`~Expanding.sem`, Standard error of mean
941943

942944
.. note::
943945

doc/source/whatsnew/v1.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,7 @@ Other enhancements
191191
- :meth:`DatetimeIndex.searchsorted`, :meth:`TimedeltaIndex.searchsorted`, :meth:`PeriodIndex.searchsorted`, and :meth:`Series.searchsorted` with datetimelike dtypes will now try to cast string arguments (listlike and scalar) to the matching datetimelike type (:issue:`36346`)
192192
- Added methods :meth:`IntegerArray.prod`, :meth:`IntegerArray.min`, and :meth:`IntegerArray.max` (:issue:`33790`)
193193
- Where possible :meth:`RangeIndex.difference` and :meth:`RangeIndex.symmetric_difference` will return :class:`RangeIndex` instead of :class:`Int64Index` (:issue:`36564`)
194+
- Added :meth:`Rolling.sem()` and :meth:`Expanding.sem()` to compute the standard error of mean (:issue:`26476`).
194195

195196
.. _whatsnew_120.api_breaking.python:
196197

pandas/core/window/expanding.py

+5
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,11 @@ def var(self, ddof=1, *args, **kwargs):
192192
nv.validate_expanding_func("var", args, kwargs)
193193
return super().var(ddof=ddof, **kwargs)
194194

195+
@Substitution(name="expanding")
196+
@Appender(_shared_docs["sem"])
197+
def sem(self, ddof=1, *args, **kwargs):
198+
return super().sem(ddof=ddof, **kwargs)
199+
195200
@Substitution(name="expanding", func_name="skew")
196201
@Appender(_doc_template)
197202
@Appender(_shared_docs["skew"])

pandas/core/window/rolling.py

+58
Original file line numberDiff line numberDiff line change
@@ -1573,6 +1573,59 @@ def skew(self, **kwargs):
15731573
"""
15741574
)
15751575

1576+
def sem(self, ddof=1, *args, **kwargs):
1577+
return self.std(*args, **kwargs) / (self.count() - ddof).pow(0.5)
1578+
1579+
_shared_docs["sem"] = dedent(
1580+
"""
1581+
Compute %(name)s standard error of mean.
1582+
1583+
Parameters
1584+
----------
1585+
1586+
ddof : int, default 1
1587+
Delta Degrees of Freedom. The divisor used in calculations
1588+
is ``N - ddof``, where ``N`` represents the number of elements.
1589+
1590+
*args, **kwargs
1591+
For NumPy compatibility. No additional arguments are used.
1592+
1593+
Returns
1594+
-------
1595+
Series or DataFrame
1596+
Returned object type is determined by the caller of the %(name)s
1597+
calculation.
1598+
1599+
See Also
1600+
--------
1601+
pandas.Series.%(name)s : Calling object with Series data.
1602+
pandas.DataFrame.%(name)s : Calling object with DataFrames.
1603+
pandas.Series.sem : Equivalent method for Series.
1604+
pandas.DataFrame.sem : Equivalent method for DataFrame.
1605+
1606+
Notes
1607+
-----
1608+
A minimum of one period is required for the rolling calculation.
1609+
1610+
Examples
1611+
--------
1612+
>>> s = pd.Series([0, 1, 2, 3])
1613+
>>> s.rolling(2, min_periods=1).sem()
1614+
0 NaN
1615+
1 0.707107
1616+
2 0.707107
1617+
3 0.707107
1618+
dtype: float64
1619+
1620+
>>> s.expanding().sem()
1621+
0 NaN
1622+
1 0.707107
1623+
2 0.707107
1624+
3 0.745356
1625+
dtype: float64
1626+
"""
1627+
)
1628+
15761629
def kurt(self, **kwargs):
15771630
window_func = self._get_roll_func("roll_kurt")
15781631
kwargs.pop("require_min_periods", None)
@@ -2081,6 +2134,11 @@ def var(self, ddof=1, *args, **kwargs):
20812134
def skew(self, **kwargs):
20822135
return super().skew(**kwargs)
20832136

2137+
@Substitution(name="rolling")
2138+
@Appender(_shared_docs["sem"])
2139+
def sem(self, ddof=1, *args, **kwargs):
2140+
return self.std(*args, **kwargs) / (self.count() - ddof).pow(0.5)
2141+
20842142
_agg_doc = dedent(
20852143
"""
20862144
Examples

pandas/tests/window/test_expanding.py

+11
Original file line numberDiff line numberDiff line change
@@ -236,3 +236,14 @@ def test_center_deprecate_warning():
236236

237237
with tm.assert_produces_warning(None):
238238
df.expanding()
239+
240+
241+
@pytest.mark.parametrize("constructor", ["DataFrame", "Series"])
242+
def test_expanding_sem(constructor):
243+
# GH: 26476
244+
obj = getattr(pd, constructor)([0, 1, 2])
245+
result = obj.expanding().sem()
246+
if isinstance(result, DataFrame):
247+
result = pd.Series(result[0].values)
248+
expected = pd.Series([np.nan] + [0.707107] * 2)
249+
tm.assert_series_equal(result, expected)

pandas/tests/window/test_grouper.py

+18
Original file line numberDiff line numberDiff line change
@@ -531,3 +531,21 @@ def test_groupby_rolling_count_closed_on(self):
531531
),
532532
)
533533
tm.assert_series_equal(result, expected)
534+
535+
@pytest.mark.parametrize(
536+
("func", "kwargs"),
537+
[("rolling", {"window": 2, "min_periods": 1}), ("expanding", {})],
538+
)
539+
def test_groupby_rolling_sem(self, func, kwargs):
540+
# GH: 26476
541+
df = pd.DataFrame(
542+
[["a", 1], ["a", 2], ["b", 1], ["b", 2], ["b", 3]], columns=["a", "b"]
543+
)
544+
result = getattr(df.groupby("a"), func)(**kwargs).sem()
545+
expected = pd.DataFrame(
546+
{"a": [np.nan] * 5, "b": [np.nan, 0.70711, np.nan, 0.70711, 0.70711]},
547+
index=pd.MultiIndex.from_tuples(
548+
[("a", 0), ("a", 1), ("b", 2), ("b", 3), ("b", 4)], names=["a", None]
549+
),
550+
)
551+
tm.assert_frame_equal(result, expected)

pandas/tests/window/test_rolling.py

+11
Original file line numberDiff line numberDiff line change
@@ -868,3 +868,14 @@ def test_rolling_period_index(index, window, func, values):
868868
result = getattr(ds.rolling(window, closed="left"), func)()
869869
expected = pd.Series(values, index=index)
870870
tm.assert_series_equal(result, expected)
871+
872+
873+
@pytest.mark.parametrize("constructor", ["DataFrame", "Series"])
874+
def test_rolling_sem(constructor):
875+
# GH: 26476
876+
obj = getattr(pd, constructor)([0, 1, 2])
877+
result = obj.rolling(2, min_periods=1).sem()
878+
if isinstance(result, DataFrame):
879+
result = pd.Series(result[0].values)
880+
expected = pd.Series([np.nan] + [0.707107] * 2)
881+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)