Skip to content

BUG: support non-nano times in ewm #56262

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Nov 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -561,12 +561,14 @@ Groupby/resample/rolling
- Bug in :meth:`.DataFrameGroupBy.idxmin`, :meth:`.DataFrameGroupBy.idxmax`, :meth:`.SeriesGroupBy.idxmin`, and :meth:`.SeriesGroupBy.idxmax` would not retain :class:`.Categorical` dtype when the index was a :class:`.CategoricalIndex` that contained NA values (:issue:`54234`)
- Bug in :meth:`.DataFrameGroupBy.transform` and :meth:`.SeriesGroupBy.transform` when ``observed=False`` and ``f="idxmin"`` or ``f="idxmax"`` would incorrectly raise on unobserved categories (:issue:`54234`)
- Bug in :meth:`DataFrame.asfreq` and :meth:`Series.asfreq` with a :class:`DatetimeIndex` with non-nanosecond resolution incorrectly converting to nanosecond resolution (:issue:`55958`)
- Bug in :meth:`DataFrame.ewm` when passed ``times`` with non-nanosecond ``datetime64`` or :class:`DatetimeTZDtype` dtype (:issue:`56262`)
- Bug in :meth:`DataFrame.resample` not respecting ``closed`` and ``label`` arguments for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55282`)
- Bug in :meth:`DataFrame.resample` where bin edges were not correct for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55281`)
- Bug in :meth:`DataFrame.resample` where bin edges were not correct for :class:`~pandas.tseries.offsets.MonthBegin` (:issue:`55271`)
- Bug in :meth:`DataFrameGroupBy.value_counts` and :meth:`SeriesGroupBy.value_count` could result in incorrect sorting if the columns of the DataFrame or name of the Series are integers (:issue:`55951`)
- Bug in :meth:`DataFrameGroupBy.value_counts` and :meth:`SeriesGroupBy.value_count` would not respect ``sort=False`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby` (:issue:`55951`)
- Bug in :meth:`DataFrameGroupBy.value_counts` and :meth:`SeriesGroupBy.value_count` would sort by proportions rather than frequencies when ``sort=True`` and ``normalize=True`` (:issue:`55951`)
-

Reshaping
^^^^^^^^^
Expand Down
19 changes: 13 additions & 6 deletions pandas/core/window/ewm.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,15 @@
from pandas.util._decorators import doc

from pandas.core.dtypes.common import (
is_datetime64_ns_dtype,
is_datetime64_dtype,
is_numeric_dtype,
)
from pandas.core.dtypes.dtypes import DatetimeTZDtype
from pandas.core.dtypes.generic import ABCSeries
from pandas.core.dtypes.missing import isna

from pandas.core import common
from pandas.core.arrays.datetimelike import dtype_to_unit
from pandas.core.indexers.objects import (
BaseIndexer,
ExponentialMovingWindowIndexer,
Expand Down Expand Up @@ -56,6 +58,7 @@
from pandas._typing import (
Axis,
TimedeltaConvertibleTypes,
npt,
)

from pandas import (
Expand Down Expand Up @@ -101,7 +104,7 @@ def get_center_of_mass(
def _calculate_deltas(
times: np.ndarray | NDFrame,
halflife: float | TimedeltaConvertibleTypes | None,
) -> np.ndarray:
) -> npt.NDArray[np.float64]:
"""
Return the diff of the times divided by the half-life. These values are used in
the calculation of the ewm mean.
Expand All @@ -119,11 +122,11 @@ def _calculate_deltas(
np.ndarray
Diff of the times divided by the half-life
"""
unit = dtype_to_unit(times.dtype)
if isinstance(times, ABCSeries):
times = times._values
_times = np.asarray(times.view(np.int64), dtype=np.float64)
# TODO: generalize to non-nano?
_halflife = float(Timedelta(halflife).as_unit("ns")._value)
_halflife = float(Timedelta(halflife).as_unit(unit)._value)
return np.diff(_times) / _halflife


Expand Down Expand Up @@ -366,8 +369,12 @@ def __init__(
if self.times is not None:
if not self.adjust:
raise NotImplementedError("times is not supported with adjust=False.")
if not is_datetime64_ns_dtype(self.times):
raise ValueError("times must be datetime64[ns] dtype.")
times_dtype = getattr(self.times, "dtype", None)
if not (
is_datetime64_dtype(times_dtype)
or isinstance(times_dtype, DatetimeTZDtype)
):
raise ValueError("times must be datetime64 dtype.")
if len(self.times) != len(obj):
raise ValueError("times must be the same length as the object.")
if not isinstance(self.halflife, (str, datetime.timedelta, np.timedelta64)):
Expand Down
26 changes: 1 addition & 25 deletions pandas/tests/window/test_ewm.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def test_constructor(frame_or_series):


def test_ewma_times_not_datetime_type():
msg = r"times must be datetime64\[ns\] dtype."
msg = r"times must be datetime64 dtype."
with pytest.raises(ValueError, match=msg):
Series(range(5)).ewm(times=np.arange(5))

Expand Down Expand Up @@ -102,30 +102,6 @@ def test_ewma_with_times_equal_spacing(halflife_with_times, times, min_periods):
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize(
"unit",
[
pytest.param(
"s",
marks=pytest.mark.xfail(
reason="ExponentialMovingWindow constructor raises on non-nano"
),
),
pytest.param(
"ms",
marks=pytest.mark.xfail(
reason="ExponentialMovingWindow constructor raises on non-nano"
),
),
pytest.param(
"us",
marks=pytest.mark.xfail(
reason="ExponentialMovingWindow constructor raises on non-nano"
),
),
"ns",
],
)
def test_ewma_with_times_variable_spacing(tz_aware_fixture, unit):
tz = tz_aware_fixture
halflife = "23 days"
Expand Down