From 1c8ed5c2d444eece3c0cbfba4813148b6bba5bbf Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 30 Nov 2023 08:41:43 -0800 Subject: [PATCH 1/4] BUG: support non-nano times in ewm --- doc/source/whatsnew/v2.2.0.rst | 2 ++ pandas/core/window/ewm.py | 17 ++++++++++++----- pandas/tests/window/test_ewm.py | 24 ------------------------ 3 files changed, 14 insertions(+), 29 deletions(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 8cb4b3f24d435..22e24b5162321 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -566,6 +566,8 @@ Groupby/resample/rolling - Bug in :meth:`DataFrameGroupBy.value_counts` and :meth:`SeriesGroupBy.value_count` could result in incorrect sorting if the columns of the DataFrame or name of the Series are integers (:issue:`55951`) - Bug in :meth:`DataFrameGroupBy.value_counts` and :meth:`SeriesGroupBy.value_count` would not respect ``sort=False`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby` (:issue:`55951`) - Bug in :meth:`DataFrameGroupBy.value_counts` and :meth:`SeriesGroupBy.value_count` would sort by proportions rather than frequencies when ``sort=True`` and ``normalize=True`` (:issue:`55951`) +- Bug in :meth:`DataFrame.ewm` when passed ``times`` with non-nanosecond ``datetime64`` or :class:`DatetimeTZDtype` dtype (:issue:`??`) +- Reshaping ^^^^^^^^^ diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index db659713c6f16..3d20a6719428f 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -12,13 +12,15 @@ from pandas.util._decorators import doc from pandas.core.dtypes.common import ( - is_datetime64_ns_dtype, + is_datetime64_dtype, is_numeric_dtype, ) +from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.core.dtypes.generic import ABCSeries from pandas.core.dtypes.missing import isna from pandas.core import common +from pandas.core.arrays.datetimelike import dtype_to_unit from pandas.core.indexers.objects import ( BaseIndexer, ExponentialMovingWindowIndexer, @@ -56,6 +58,7 @@ from pandas._typing import ( Axis, TimedeltaConvertibleTypes, + npt, ) from pandas import ( @@ -101,7 +104,7 @@ def get_center_of_mass( def _calculate_deltas( times: np.ndarray | NDFrame, halflife: float | TimedeltaConvertibleTypes | None, -) -> np.ndarray: +) -> npt.NDArray[np.float64]: """ Return the diff of the times divided by the half-life. These values are used in the calculation of the ewm mean. @@ -119,11 +122,11 @@ def _calculate_deltas( np.ndarray Diff of the times divided by the half-life """ + unit = dtype_to_unit(times.dtype) if isinstance(times, ABCSeries): times = times._values _times = np.asarray(times.view(np.int64), dtype=np.float64) - # TODO: generalize to non-nano? - _halflife = float(Timedelta(halflife).as_unit("ns")._value) + _halflife = float(Timedelta(halflife).as_unit(unit)._value) return np.diff(_times) / _halflife @@ -366,7 +369,11 @@ def __init__( if self.times is not None: if not self.adjust: raise NotImplementedError("times is not supported with adjust=False.") - if not is_datetime64_ns_dtype(self.times): + times_dtype = getattr(self.times, "dtype", None) + if not ( + is_datetime64_dtype(times_dtype) + or isinstance(times_dtype, DatetimeTZDtype) + ): raise ValueError("times must be datetime64[ns] dtype.") if len(self.times) != len(obj): raise ValueError("times must be the same length as the object.") diff --git a/pandas/tests/window/test_ewm.py b/pandas/tests/window/test_ewm.py index 427780db79783..9a4a2274fd430 100644 --- a/pandas/tests/window/test_ewm.py +++ b/pandas/tests/window/test_ewm.py @@ -102,30 +102,6 @@ def test_ewma_with_times_equal_spacing(halflife_with_times, times, min_periods): tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize( - "unit", - [ - pytest.param( - "s", - marks=pytest.mark.xfail( - reason="ExponentialMovingWindow constructor raises on non-nano" - ), - ), - pytest.param( - "ms", - marks=pytest.mark.xfail( - reason="ExponentialMovingWindow constructor raises on non-nano" - ), - ), - pytest.param( - "us", - marks=pytest.mark.xfail( - reason="ExponentialMovingWindow constructor raises on non-nano" - ), - ), - "ns", - ], -) def test_ewma_with_times_variable_spacing(tz_aware_fixture, unit): tz = tz_aware_fixture halflife = "23 days" From aa2f6e68cb6fe00f46ab5fe0497a2cff285046de Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 30 Nov 2023 08:42:33 -0800 Subject: [PATCH 2/4] GH ref --- doc/source/whatsnew/v2.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 22e24b5162321..30a0f0f08e2a6 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -560,13 +560,13 @@ Groupby/resample/rolling - Bug in :meth:`.DataFrameGroupBy.idxmin`, :meth:`.DataFrameGroupBy.idxmax`, :meth:`.SeriesGroupBy.idxmin`, and :meth:`.SeriesGroupBy.idxmax` would not retain :class:`.Categorical` dtype when the index was a :class:`.CategoricalIndex` that contained NA values (:issue:`54234`) - Bug in :meth:`.DataFrameGroupBy.transform` and :meth:`.SeriesGroupBy.transform` when ``observed=False`` and ``f="idxmin"`` or ``f="idxmax"`` would incorrectly raise on unobserved categories (:issue:`54234`) - Bug in :meth:`DataFrame.asfreq` and :meth:`Series.asfreq` with a :class:`DatetimeIndex` with non-nanosecond resolution incorrectly converting to nanosecond resolution (:issue:`55958`) +- Bug in :meth:`DataFrame.ewm` when passed ``times`` with non-nanosecond ``datetime64`` or :class:`DatetimeTZDtype` dtype (:issue:`56262`) - Bug in :meth:`DataFrame.resample` not respecting ``closed`` and ``label`` arguments for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55282`) - Bug in :meth:`DataFrame.resample` where bin edges were not correct for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55281`) - Bug in :meth:`DataFrame.resample` where bin edges were not correct for :class:`~pandas.tseries.offsets.MonthBegin` (:issue:`55271`) - Bug in :meth:`DataFrameGroupBy.value_counts` and :meth:`SeriesGroupBy.value_count` could result in incorrect sorting if the columns of the DataFrame or name of the Series are integers (:issue:`55951`) - Bug in :meth:`DataFrameGroupBy.value_counts` and :meth:`SeriesGroupBy.value_count` would not respect ``sort=False`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby` (:issue:`55951`) - Bug in :meth:`DataFrameGroupBy.value_counts` and :meth:`SeriesGroupBy.value_count` would sort by proportions rather than frequencies when ``sort=True`` and ``normalize=True`` (:issue:`55951`) -- Bug in :meth:`DataFrame.ewm` when passed ``times`` with non-nanosecond ``datetime64`` or :class:`DatetimeTZDtype` dtype (:issue:`??`) - Reshaping From f7f36673b512bb933f3cd6fb49e76fbb8ce9a25f Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 30 Nov 2023 13:56:15 -0800 Subject: [PATCH 3/4] update exception message --- pandas/core/window/ewm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index 3d20a6719428f..9ebf32d3e536e 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -374,7 +374,7 @@ def __init__( is_datetime64_dtype(times_dtype) or isinstance(times_dtype, DatetimeTZDtype) ): - raise ValueError("times must be datetime64[ns] dtype.") + raise ValueError("times must be datetime64 dtype.") if len(self.times) != len(obj): raise ValueError("times must be the same length as the object.") if not isinstance(self.halflife, (str, datetime.timedelta, np.timedelta64)): From d53de72d2702aca1a99e83128040b5eaca71174e Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 30 Nov 2023 13:56:37 -0800 Subject: [PATCH 4/4] update test --- pandas/tests/window/test_ewm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/window/test_ewm.py b/pandas/tests/window/test_ewm.py index 9a4a2274fd430..058e5ce36e53e 100644 --- a/pandas/tests/window/test_ewm.py +++ b/pandas/tests/window/test_ewm.py @@ -60,7 +60,7 @@ def test_constructor(frame_or_series): def test_ewma_times_not_datetime_type(): - msg = r"times must be datetime64\[ns\] dtype." + msg = r"times must be datetime64 dtype." with pytest.raises(ValueError, match=msg): Series(range(5)).ewm(times=np.arange(5))