From 3dd89877f0beb29788ce81f6f349214157b8384d Mon Sep 17 00:00:00 2001 From: Trevor Serrao Date: Fri, 28 Jun 2024 13:30:42 -0500 Subject: [PATCH 1/7] add adjust parameter to the ewma variable times test. Add tests for disallowed decay-specification parameters when times is specified and adjust=False --- pandas/tests/window/test_ewm.py | 38 +++++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/pandas/tests/window/test_ewm.py b/pandas/tests/window/test_ewm.py index 35c896dc0090b..8023a7fa99966 100644 --- a/pandas/tests/window/test_ewm.py +++ b/pandas/tests/window/test_ewm.py @@ -102,7 +102,8 @@ def test_ewma_with_times_equal_spacing(halflife_with_times, times, min_periods): tm.assert_frame_equal(result, expected) -def test_ewma_with_times_variable_spacing(tz_aware_fixture, unit): +def test_ewma_with_times_variable_spacing(tz_aware_fixture, unit, adjust): + #GH 54328 tz = tz_aware_fixture halflife = "23 days" times = ( @@ -112,8 +113,11 @@ def test_ewma_with_times_variable_spacing(tz_aware_fixture, unit): ) data = np.arange(3) df = DataFrame(data) - result = df.ewm(halflife=halflife, times=times).mean() - expected = DataFrame([0.0, 0.5674161888241773, 1.545239952073459]) + result = df.ewm(halflife=halflife, times=times, adjust=adjust).mean() + if adjust: + expected = DataFrame([0.0, 0.5674161888241773, 1.545239952073459]) + else: + expected = DataFrame([0.0, 0.23762518642226227, 1.534926369128742]) tm.assert_frame_equal(result, expected) @@ -148,13 +152,33 @@ def test_ewm_getitem_attributes_retained(arg, adjust, ignore_na): assert result == expected -def test_ewma_times_adjust_false_raises(): - # GH 40098 +def test_ewma_times_adjust_false_with_disallowed_com(): + # GH 54328 + with pytest.raises( + NotImplementedError, match='None of com, span, or alpha can be specified if times is provided and adjust=False' + ): + Series(range(1)).ewm( + 0.1, adjust=False, times=date_range("2000", freq="D", periods=1), halflife='1D' + ) + + +def test_ewma_times_adjust_false_with_disallowed_alpha(): + # GH 54328 + with pytest.raises( + NotImplementedError, match='None of com, span, or alpha can be specified if times is provided and adjust=False' + ): + Series(range(1)).ewm( + 0.1, adjust=False, times=date_range("2000", freq="D", periods=1), alpha=0.5, halflife='1D' + ) + + +def test_ewma_times_adjust_false_with_disallowed_span(): + # GH 54328 with pytest.raises( - NotImplementedError, match="times is not supported with adjust=False." + NotImplementedError, match='None of com, span, or alpha can be specified if times is provided and adjust=False' ): Series(range(1)).ewm( - 0.1, adjust=False, times=date_range("2000", freq="D", periods=1) + 0.1, adjust=False, times=date_range("2000", freq="D", periods=1), span=10, halflife='1D' ) From 5992c3e08b82ad4aeefbc27883f81c63eb0ce2ff Mon Sep 17 00:00:00 2001 From: Trevor Serrao Date: Fri, 28 Jun 2024 13:31:15 -0500 Subject: [PATCH 2/7] allow adjust=False when times is provided --- pandas/core/window/ewm.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index b2855ff1f4048..06b474982f30e 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -134,8 +134,10 @@ class ExponentialMovingWindow(BaseWindow): Provide exponentially weighted (EW) calculations. Exactly one of ``com``, ``span``, ``halflife``, or ``alpha`` must be - provided if ``times`` is not provided. If ``times`` is provided, + provided if ``times`` is not provided. If ``times`` is provided and ``adjust=True``, ``halflife`` and one of ``com``, ``span`` or ``alpha`` may be provided. + If ``times`` is provided and ``adjust=False``, ``halflife`` must be the only + provided decay-specification parameter. Parameters ---------- @@ -358,8 +360,6 @@ def __init__( self.ignore_na = ignore_na self.times = times if self.times is not None: - if not self.adjust: - raise NotImplementedError("times is not supported with adjust=False.") times_dtype = getattr(self.times, "dtype", None) if not ( is_datetime64_dtype(times_dtype) @@ -376,6 +376,9 @@ def __init__( # Halflife is no longer applicable when calculating COM # But allow COM to still be calculated if the user passes other decay args if common.count_not_none(self.com, self.span, self.alpha) > 0: + if not self.adjust: + raise NotImplementedError('None of com, span, or alpha can be specified if ' + 'times is provided and adjust=False') self._com = get_center_of_mass(self.com, self.span, None, self.alpha) else: self._com = 1.0 From fa1447aa40a0b31d6bbf50ff87f80679cd908b68 Mon Sep 17 00:00:00 2001 From: Trevor Serrao Date: Fri, 28 Jun 2024 13:33:07 -0500 Subject: [PATCH 3/7] re-calculate alpha each iteration for irregular-spaced time series --- pandas/_libs/window/aggregations.pyx | 3 +++ pandas/core/window/numba_.py | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 6365c030b695b..139c2cf18d5f0 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -1813,6 +1813,9 @@ def ewm(const float64_t[:] vals, const int64_t[:] start, const int64_t[:] end, if normalize: # avoid numerical errors on constant series if weighted != cur: + if not adjust and com == 1: + # update alpha "on the fly" for irregular-interval time series + new_wt = 1. - old_wt weighted = old_wt * weighted + new_wt * cur weighted /= (old_wt + new_wt) if adjust: diff --git a/pandas/core/window/numba_.py b/pandas/core/window/numba_.py index 621b0f2c0f2d8..4a20ef9c4468b 100644 --- a/pandas/core/window/numba_.py +++ b/pandas/core/window/numba_.py @@ -149,6 +149,9 @@ def ewm( # note that len(deltas) = len(vals) - 1 and deltas[i] # is to be used in conjunction with vals[i+1] old_wt *= old_wt_factor ** deltas[start + j - 1] + if not adjust and com == 1: + # update alpha "on the fly" for irregular-interval time series + new_wt = 1. - old_wt else: weighted = old_wt_factor * weighted if is_observation: @@ -324,6 +327,9 @@ def ewm_table( # note that len(deltas) = len(vals) - 1 and deltas[i] # is to be used in conjunction with vals[i+1] old_wt[j] *= old_wt_factor ** deltas[i - 1] + if not adjust and com == 1: + # update alpha "on the fly" for irregular-interval time series + new_wt = 1. - old_wt[j] else: weighted[j] = old_wt_factor * weighted[j] if is_observations[j]: From f64d32e9c3ebaafd3c975532d4685c1e06bdb4ef Mon Sep 17 00:00:00 2001 From: Trevor Serrao Date: Fri, 28 Jun 2024 14:31:55 -0500 Subject: [PATCH 4/7] whatsnew entry for allowing adjust=False with times --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 130ccded72859..138743882bd5a 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -48,6 +48,7 @@ Other enhancements - :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`) - Restore support for reading Stata 104-format and enable reading 103-format dta files (:issue:`58554`) - Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`) +- :meth:`DataFrame.ewm` now allows ``adjust=False`` when ``times`` is provided (:issue:`54328`) .. --------------------------------------------------------------------------- .. _whatsnew_300.notable_bug_fixes: From f1d8a6307a8c942cb7d3f35c318e1ef801871583 Mon Sep 17 00:00:00 2001 From: Trevor Serrao Date: Fri, 28 Jun 2024 15:01:01 -0500 Subject: [PATCH 5/7] pre-commit style fixes --- doc/source/whatsnew/v3.0.0.rst | 2 +- pandas/core/window/ewm.py | 6 ++++-- pandas/core/window/numba_.py | 4 ++-- pandas/tests/window/test_ewm.py | 28 +++++++++++++++++++++------- 4 files changed, 28 insertions(+), 12 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 138743882bd5a..816185c428bda 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -42,13 +42,13 @@ Other enhancements - :func:`DataFrame.to_excel` argument ``merge_cells`` now accepts a value of ``"columns"`` to only merge :class:`MultiIndex` column header header cells (:issue:`35384`) - :meth:`DataFrame.corrwith` now accepts ``min_periods`` as optional arguments, as in :meth:`DataFrame.corr` and :meth:`Series.corr` (:issue:`9490`) - :meth:`DataFrame.cummin`, :meth:`DataFrame.cummax`, :meth:`DataFrame.cumprod` and :meth:`DataFrame.cumsum` methods now have a ``numeric_only`` parameter (:issue:`53072`) +- :meth:`DataFrame.ewm` now allows ``adjust=False`` when ``times`` is provided (:issue:`54328`) - :meth:`DataFrame.fillna` and :meth:`Series.fillna` can now accept ``value=None``; for non-object dtype the corresponding NA value will be used (:issue:`57723`) - :meth:`DataFrame.pivot_table` and :func:`pivot_table` now allow the passing of keyword arguments to ``aggfunc`` through ``**kwargs`` (:issue:`57884`) - :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`) - :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`) - Restore support for reading Stata 104-format and enable reading 103-format dta files (:issue:`58554`) - Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`) -- :meth:`DataFrame.ewm` now allows ``adjust=False`` when ``times`` is provided (:issue:`54328`) .. --------------------------------------------------------------------------- .. _whatsnew_300.notable_bug_fixes: diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index 06b474982f30e..43a3c03b6cef9 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -377,8 +377,10 @@ def __init__( # But allow COM to still be calculated if the user passes other decay args if common.count_not_none(self.com, self.span, self.alpha) > 0: if not self.adjust: - raise NotImplementedError('None of com, span, or alpha can be specified if ' - 'times is provided and adjust=False') + raise NotImplementedError( + "None of com, span, or alpha can be specified if " + "times is provided and adjust=False" + ) self._com = get_center_of_mass(self.com, self.span, None, self.alpha) else: self._com = 1.0 diff --git a/pandas/core/window/numba_.py b/pandas/core/window/numba_.py index 4a20ef9c4468b..2a0a5d8391ce2 100644 --- a/pandas/core/window/numba_.py +++ b/pandas/core/window/numba_.py @@ -151,7 +151,7 @@ def ewm( old_wt *= old_wt_factor ** deltas[start + j - 1] if not adjust and com == 1: # update alpha "on the fly" for irregular-interval time series - new_wt = 1. - old_wt + new_wt = 1.0 - old_wt else: weighted = old_wt_factor * weighted if is_observation: @@ -329,7 +329,7 @@ def ewm_table( old_wt[j] *= old_wt_factor ** deltas[i - 1] if not adjust and com == 1: # update alpha "on the fly" for irregular-interval time series - new_wt = 1. - old_wt[j] + new_wt = 1.0 - old_wt[j] else: weighted[j] = old_wt_factor * weighted[j] if is_observations[j]: diff --git a/pandas/tests/window/test_ewm.py b/pandas/tests/window/test_ewm.py index 8023a7fa99966..a68afc2dd8502 100644 --- a/pandas/tests/window/test_ewm.py +++ b/pandas/tests/window/test_ewm.py @@ -103,7 +103,7 @@ def test_ewma_with_times_equal_spacing(halflife_with_times, times, min_periods): def test_ewma_with_times_variable_spacing(tz_aware_fixture, unit, adjust): - #GH 54328 + # GH 54328 tz = tz_aware_fixture halflife = "23 days" times = ( @@ -155,30 +155,44 @@ def test_ewm_getitem_attributes_retained(arg, adjust, ignore_na): def test_ewma_times_adjust_false_with_disallowed_com(): # GH 54328 with pytest.raises( - NotImplementedError, match='None of com, span, or alpha can be specified if times is provided and adjust=False' + NotImplementedError, + match="None of com, span, or alpha can be specified if times is provided and adjust=False", ): Series(range(1)).ewm( - 0.1, adjust=False, times=date_range("2000", freq="D", periods=1), halflife='1D' + 0.1, + adjust=False, + times=date_range("2000", freq="D", periods=1), + halflife="1D", ) def test_ewma_times_adjust_false_with_disallowed_alpha(): # GH 54328 with pytest.raises( - NotImplementedError, match='None of com, span, or alpha can be specified if times is provided and adjust=False' + NotImplementedError, + match="None of com, span, or alpha can be specified if times is provided and adjust=False", ): Series(range(1)).ewm( - 0.1, adjust=False, times=date_range("2000", freq="D", periods=1), alpha=0.5, halflife='1D' + 0.1, + adjust=False, + times=date_range("2000", freq="D", periods=1), + alpha=0.5, + halflife="1D", ) def test_ewma_times_adjust_false_with_disallowed_span(): # GH 54328 with pytest.raises( - NotImplementedError, match='None of com, span, or alpha can be specified if times is provided and adjust=False' + NotImplementedError, + match="None of com, span, or alpha can be specified if times is provided and adjust=False", ): Series(range(1)).ewm( - 0.1, adjust=False, times=date_range("2000", freq="D", periods=1), span=10, halflife='1D' + 0.1, + adjust=False, + times=date_range("2000", freq="D", periods=1), + span=10, + halflife="1D", ) From 33f2054dd52f93f6956d341a581d7558f1b70d6a Mon Sep 17 00:00:00 2001 From: Trevor Serrao Date: Tue, 2 Jul 2024 11:53:58 -0500 Subject: [PATCH 6/7] reduce line lengths to comply with pre-commit --- pandas/core/window/numba_.py | 4 ++-- pandas/tests/window/test_ewm.py | 9 ++++++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/pandas/core/window/numba_.py b/pandas/core/window/numba_.py index 2a0a5d8391ce2..171d3bc1d1c35 100644 --- a/pandas/core/window/numba_.py +++ b/pandas/core/window/numba_.py @@ -150,7 +150,7 @@ def ewm( # is to be used in conjunction with vals[i+1] old_wt *= old_wt_factor ** deltas[start + j - 1] if not adjust and com == 1: - # update alpha "on the fly" for irregular-interval time series + # update in case of irregular-interval time series new_wt = 1.0 - old_wt else: weighted = old_wt_factor * weighted @@ -328,7 +328,7 @@ def ewm_table( # is to be used in conjunction with vals[i+1] old_wt[j] *= old_wt_factor ** deltas[i - 1] if not adjust and com == 1: - # update alpha "on the fly" for irregular-interval time series + # update in case of irregular-interval time series new_wt = 1.0 - old_wt[j] else: weighted[j] = old_wt_factor * weighted[j] diff --git a/pandas/tests/window/test_ewm.py b/pandas/tests/window/test_ewm.py index a68afc2dd8502..02646bd3399d4 100644 --- a/pandas/tests/window/test_ewm.py +++ b/pandas/tests/window/test_ewm.py @@ -156,7 +156,8 @@ def test_ewma_times_adjust_false_with_disallowed_com(): # GH 54328 with pytest.raises( NotImplementedError, - match="None of com, span, or alpha can be specified if times is provided and adjust=False", + match=("None of com, span, or alpha can be specified " + "if times is provided and adjust=False"), ): Series(range(1)).ewm( 0.1, @@ -170,7 +171,8 @@ def test_ewma_times_adjust_false_with_disallowed_alpha(): # GH 54328 with pytest.raises( NotImplementedError, - match="None of com, span, or alpha can be specified if times is provided and adjust=False", + match=("None of com, span, or alpha can be specified " + "if times is provided and adjust=False"), ): Series(range(1)).ewm( 0.1, @@ -185,7 +187,8 @@ def test_ewma_times_adjust_false_with_disallowed_span(): # GH 54328 with pytest.raises( NotImplementedError, - match="None of com, span, or alpha can be specified if times is provided and adjust=False", + match=("None of com, span, or alpha can be specified " + "if times is provided and adjust=False"), ): Series(range(1)).ewm( 0.1, From 5ca8b17c2bdb7077a74312883aeba6fcdebd29bd Mon Sep 17 00:00:00 2001 From: Trevor Serrao Date: Tue, 2 Jul 2024 12:16:20 -0500 Subject: [PATCH 7/7] reduce line lengths and apply ruff-reformat changes --- pandas/_libs/window/aggregations.pyx | 2 +- pandas/tests/window/test_ewm.py | 18 ++++++++++++------ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 139c2cf18d5f0..5b9ee095d4643 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -1814,7 +1814,7 @@ def ewm(const float64_t[:] vals, const int64_t[:] start, const int64_t[:] end, # avoid numerical errors on constant series if weighted != cur: if not adjust and com == 1: - # update alpha "on the fly" for irregular-interval time series + # update in case of irregular-interval series new_wt = 1. - old_wt weighted = old_wt * weighted + new_wt * cur weighted /= (old_wt + new_wt) diff --git a/pandas/tests/window/test_ewm.py b/pandas/tests/window/test_ewm.py index 02646bd3399d4..4ea6c805a2ee4 100644 --- a/pandas/tests/window/test_ewm.py +++ b/pandas/tests/window/test_ewm.py @@ -156,8 +156,10 @@ def test_ewma_times_adjust_false_with_disallowed_com(): # GH 54328 with pytest.raises( NotImplementedError, - match=("None of com, span, or alpha can be specified " - "if times is provided and adjust=False"), + match=( + "None of com, span, or alpha can be specified " + "if times is provided and adjust=False" + ), ): Series(range(1)).ewm( 0.1, @@ -171,8 +173,10 @@ def test_ewma_times_adjust_false_with_disallowed_alpha(): # GH 54328 with pytest.raises( NotImplementedError, - match=("None of com, span, or alpha can be specified " - "if times is provided and adjust=False"), + match=( + "None of com, span, or alpha can be specified " + "if times is provided and adjust=False" + ), ): Series(range(1)).ewm( 0.1, @@ -187,8 +191,10 @@ def test_ewma_times_adjust_false_with_disallowed_span(): # GH 54328 with pytest.raises( NotImplementedError, - match=("None of com, span, or alpha can be specified " - "if times is provided and adjust=False"), + match=( + "None of com, span, or alpha can be specified " + "if times is provided and adjust=False" + ), ): Series(range(1)).ewm( 0.1,