diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index cf653a6875a9c..a1967c939092f 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -1,7 +1,7 @@ """ Base and utility classes for tseries type pandas objects. """ -from datetime import datetime +from datetime import datetime, timedelta from typing import Any, List, Optional, Union, cast import numpy as np @@ -17,14 +17,18 @@ ensure_int64, ensure_platform_int, is_bool_dtype, + is_datetime64_any_dtype, is_dtype_equal, is_integer, is_list_like, + is_object_dtype, is_period_dtype, is_scalar, + is_timedelta64_dtype, ) from pandas.core.dtypes.concat import concat_compat from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries +from pandas.core.dtypes.missing import isna from pandas.core import algorithms from pandas.core.arrays import DatetimeArray, PeriodArray, TimedeltaArray @@ -41,7 +45,8 @@ from pandas.core.ops import get_op_result_name from pandas.core.tools.timedeltas import to_timedelta -from pandas.tseries.frequencies import DateOffset +from pandas.tseries.frequencies import DateOffset, to_offset +from pandas.tseries.offsets import Tick _index_doc_kwargs = dict(ibase._index_doc_kwargs) @@ -72,13 +77,33 @@ def wrapper(left, right): return wrapper +def _make_wrapped_arith_op_with_freq(opname: str): + """ + Dispatch the operation to the underlying ExtensionArray, and infer + the appropriate frequency for the result. + """ + meth = make_wrapped_arith_op(opname) + + def wrapped(self, other): + result = meth(self, other) + if result is NotImplemented: + return NotImplemented + + new_freq = self._get_addsub_freq(other) + result._freq = new_freq + return result + + wrapped.__name__ = opname + return wrapped + + @inherit_names( ["inferred_freq", "_isnan", "_resolution", "resolution"], DatetimeLikeArrayMixin, cache=True, ) @inherit_names( - ["mean", "freq", "freqstr", "asi8", "_box_func"], DatetimeLikeArrayMixin, + ["mean", "asi8", "_box_func"], DatetimeLikeArrayMixin, ) class DatetimeIndexOpsMixin(ExtensionIndex): """ @@ -446,10 +471,45 @@ def get_indexer_non_unique(self, target): return ensure_platform_int(indexer), missing # -------------------------------------------------------------------- + # Arithmetic Methods + + def _get_addsub_freq(self, other) -> Optional[DateOffset]: + """ + Find the freq we expect the result of an addition/subtraction operation + to have. + """ + if is_period_dtype(self.dtype): + # Only used for ops that stay PeriodDtype + return self.freq + elif self.freq is None: + return None + elif lib.is_scalar(other) and isna(other): + return None + + elif isinstance(other, (Tick, timedelta, np.timedelta64)): + new_freq = None + if isinstance(self.freq, Tick): + new_freq = self.freq + return new_freq + + elif isinstance(other, DateOffset): + # otherwise just DatetimeArray + return None # TODO: Should we infer if it matches self.freq * n? + elif isinstance(other, (datetime, np.datetime64)): + return self.freq + + elif is_timedelta64_dtype(other): + return None # TODO: shouldnt we be able to do self.freq + other.freq? + elif is_object_dtype(other): + return None # TODO: is this quite right? sometimes we unpack singletons + elif is_datetime64_any_dtype(other): + return None # TODO: shouldnt we be able to do self.freq + other.freq? + else: + raise NotImplementedError - __add__ = make_wrapped_arith_op("__add__") + __add__ = _make_wrapped_arith_op_with_freq("__add__") + __sub__ = _make_wrapped_arith_op_with_freq("__sub__") __radd__ = make_wrapped_arith_op("__radd__") - __sub__ = make_wrapped_arith_op("__sub__") __rsub__ = make_wrapped_arith_op("__rsub__") __pow__ = make_wrapped_arith_op("__pow__") __rpow__ = make_wrapped_arith_op("__rpow__") @@ -558,7 +618,9 @@ def shift(self, periods=1, freq=None): Index.shift : Shift values of Index. PeriodIndex.shift : Shift values of PeriodIndex. """ - result = self._data._time_shift(periods, freq=freq) + arr = self._data.view() + arr._freq = self.freq + result = arr._time_shift(periods, freq=freq) return type(self)(result, name=self.name) # -------------------------------------------------------------------- @@ -610,21 +672,40 @@ class DatetimeTimedeltaMixin(DatetimeIndexOpsMixin, Int64Index): _is_monotonic_increasing = Index.is_monotonic_increasing _is_monotonic_decreasing = Index.is_monotonic_decreasing _is_unique = Index.is_unique + _freq = lib.no_default - def _set_freq(self, freq): + @property + def freq(self): + """ + In limited circumstances, our freq may differ from that of our _data. """ - Set the _freq attribute on our underlying DatetimeArray. + if self._freq is not lib.no_default: + return self._freq + return self._data.freq - Parameters - ---------- - freq : DateOffset, None, or "infer" + @property + def freqstr(self): + """ + Return the frequency object as a string if its set, otherwise None. """ - # GH#29843 - self._data._with_freq(freq) + if self.freq is None: + return None + return self.freq.freqstr def _with_freq(self, freq): index = self.copy(deep=False) - index._set_freq(freq) + if freq is None: + # Even if we _can_ have a freq, we might want to set it to None + index._freq = None + elif len(self) == 0 and isinstance(freq, DateOffset): + # Always valid. In the TimedeltaArray case, we assume this + # is a Tick offset. + index._freq = freq + else: + assert freq == "infer", freq + freq = to_offset(self.inferred_freq) + index._freq = freq + return index def _shallow_copy(self, values=None, name: Label = lib.no_default): @@ -647,8 +728,7 @@ def _shallow_copy(self, values=None, name: Label = lib.no_default): @Appender(Index.difference.__doc__) def difference(self, other, sort=None): - new_idx = super().difference(other, sort=sort) - new_idx._set_freq(None) + new_idx = super().difference(other, sort=sort)._with_freq(None) return new_idx def intersection(self, other, sort=False): @@ -693,7 +773,7 @@ def intersection(self, other, sort=False): result = Index.intersection(self, other, sort=sort) if isinstance(result, type(self)): if result.freq is None: - result._set_freq("infer") + result = result._with_freq("infer") return result elif ( @@ -704,14 +784,7 @@ def intersection(self, other, sort=False): or (not self.is_monotonic or not other.is_monotonic) ): result = Index.intersection(self, other, sort=sort) - - # Invalidate the freq of `result`, which may not be correct at - # this point, depending on the values. - - result._set_freq(None) - result = self._shallow_copy(result._data, name=result.name) - if result.freq is None: - result._set_freq("infer") + result = result._with_freq("infer") return result # to make our life easier, "sort" the two ranges @@ -781,10 +854,9 @@ def _fast_union(self, other, sort=None): left_start = left[0] loc = right.searchsorted(left_start, side="left") right_chunk = right._values[:loc] - dates = concat_compat([left._values, right_chunk]) - result = self._shallow_copy(dates) - result._set_freq("infer") + dates = concat_compat((left._values, right_chunk)) # TODO: can we infer that it has self.freq? + result = self._shallow_copy(dates)._with_freq("infer") return result else: left, right = other, self @@ -797,9 +869,8 @@ def _fast_union(self, other, sort=None): loc = right.searchsorted(left_end, side="right") right_chunk = right._values[loc:] dates = concat_compat([left._values, right_chunk]) - result = self._shallow_copy(dates) - result._set_freq("infer") # TODO: can we infer that it has self.freq? + result = self._shallow_copy(dates)._with_freq("infer") return result else: return left @@ -816,7 +887,7 @@ def _union(self, other, sort): if this._can_fast_union(other): result = this._fast_union(other, sort=sort) if result.freq is None: - result._set_freq("infer") + result = result._with_freq("infer") return result else: i8self = Int64Index._simple_new(self.asi8, name=self.name) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 957c01c2dca96..d3c49c5ed0796 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -71,7 +71,7 @@ def _new_PeriodIndex(cls, **d): PeriodArray, wrap=True, ) -@inherit_names(["is_leap_year", "freq", "_format_native_types"], PeriodArray) +@inherit_names(["is_leap_year", "freq", "freqstr", "_format_native_types"], PeriodArray) class PeriodIndex(DatetimeIndexOpsMixin, Int64Index): """ Immutable ndarray holding ordinal values indicating regular periods in time. diff --git a/pandas/core/resample.py b/pandas/core/resample.py index bfef4f63e2e8a..06751d9c35fab 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1017,7 +1017,8 @@ def _downsample(self, how, **kwargs): if not len(ax): # reset to the new freq obj = obj.copy() - obj.index._set_freq(self.freq) + obj.index = obj.index._with_freq(self.freq) + assert obj.index.freq == self.freq, (obj.index.freq, self.freq) return obj # do we have a regular frequency diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index 79fcb5e9478c3..912ce2a953e0a 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -2052,7 +2052,7 @@ def test_dti_add_tdi(self, tz_naive_fixture): dti = DatetimeIndex([Timestamp("2017-01-01", tz=tz)] * 10) tdi = pd.timedelta_range("0 days", periods=10) expected = pd.date_range("2017-01-01", periods=10, tz=tz) - expected._set_freq(None) + expected = expected._with_freq(None) # add with TimdeltaIndex result = dti + tdi @@ -2074,7 +2074,7 @@ def test_dti_iadd_tdi(self, tz_naive_fixture): dti = DatetimeIndex([Timestamp("2017-01-01", tz=tz)] * 10) tdi = pd.timedelta_range("0 days", periods=10) expected = pd.date_range("2017-01-01", periods=10, tz=tz) - expected._set_freq(None) + expected = expected._with_freq(None) # iadd with TimdeltaIndex result = DatetimeIndex([Timestamp("2017-01-01", tz=tz)] * 10) diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 3ffdc87ff84c8..9378a70044d83 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -544,7 +544,7 @@ def test_tda_add_sub_index(self): def test_tda_add_dt64_object_array(self, box_df_fail, tz_naive_fixture): # Result should be cast back to DatetimeArray dti = pd.date_range("2016-01-01", periods=3, tz=tz_naive_fixture) - dti._set_freq(None) + dti = dti._with_freq(None) tdi = dti - dti obj = tm.box_expected(tdi, box_df_fail) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 957ca138498d9..52b82b36d13be 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -267,7 +267,7 @@ def test_ensure_copied_data(self, indices): if is_datetime64tz_dtype(indices.dtype): result = result.tz_localize("UTC").tz_convert(indices.tz) if isinstance(indices, (DatetimeIndex, TimedeltaIndex)): - indices._set_freq(None) + indices = indices._with_freq(None) tm.assert_index_equal(indices, result) @@ -397,7 +397,7 @@ def test_where(self, klass): i = self.create_index() if isinstance(i, (pd.DatetimeIndex, pd.TimedeltaIndex)): # where does not preserve freq - i._set_freq(None) + i = i._with_freq(None) cond = [True] * len(i) result = i.where(klass(cond)) diff --git a/pandas/tests/indexes/datetimelike.py b/pandas/tests/indexes/datetimelike.py index 944358b1540b0..dfefdc0f211b1 100644 --- a/pandas/tests/indexes/datetimelike.py +++ b/pandas/tests/indexes/datetimelike.py @@ -82,7 +82,7 @@ def test_map_dictlike(self, mapper): # don't compare the freqs if isinstance(expected, (pd.DatetimeIndex, pd.TimedeltaIndex)): - expected._set_freq(None) + expected = expected._with_freq(None) result = index.map(mapper(expected, index)) tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index a8e08bbe9a2e9..691f542fc2084 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -131,7 +131,7 @@ def test_construction_with_alt(self, kwargs, tz_aware_fixture): def test_construction_with_alt_tz_localize(self, kwargs, tz_aware_fixture): tz = tz_aware_fixture i = pd.date_range("20130101", periods=5, freq="H", tz=tz) - i._set_freq(None) + i = i._with_freq(None) kwargs = {key: attrgetter(val)(i) for key, val in kwargs.items()} if "tz" in kwargs: diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index 08706dce7e1e0..81fa1a27ac911 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -463,6 +463,5 @@ def test_split_non_utc(self): # GH 14042 indices = pd.date_range("2016-01-01 00:00:00+0200", freq="S", periods=10) result = np.split(indices, indices_or_sections=[])[0] - expected = indices.copy() - expected._set_freq(None) + expected = indices._with_freq(None) tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index f0fe5e9b293fc..603a0a452391c 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -134,7 +134,7 @@ def test_value_counts_unique(self, tz_naive_fixture): exp_idx = pd.date_range("2011-01-01 18:00", freq="-1H", periods=10, tz=tz) expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64") - expected.index._set_freq(None) + expected.index = expected.index._with_freq(None) for obj in [idx, Series(idx)]: @@ -406,6 +406,20 @@ def test_freq_setter_errors(self): with pytest.raises(ValueError, match="Invalid frequency"): idx._data.freq = "foo" + def test_freq_view_safe(self): + # Setting the freq for one DatetimeIndex shouldn't alter the freq + # for another that views the same data + + dti = pd.date_range("2016-01-01", periods=5) + dta = dti._data + + dti2 = DatetimeIndex(dta)._with_freq(None) + assert dti2.freq is None + + # Original was not altered + assert dti.freq == "D" + assert dta.freq == "D" + class TestBusinessDatetimeIndex: def setup_method(self, method): diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index df6e2dac72f95..0473ecf9de24d 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -231,9 +231,7 @@ def test_intersection(self, tz, sort): ]: result = base.intersection(rng) tm.assert_index_equal(result, expected) - assert result.name == expected.name assert result.freq == expected.freq - assert result.tz == expected.tz # non-monotonic base = DatetimeIndex( @@ -255,6 +253,7 @@ def test_intersection(self, tz, sort): # GH 7880 rng4 = date_range("7/1/2000", "7/31/2000", freq="D", tz=tz, name="idx") expected4 = DatetimeIndex([], tz=tz, name="idx") + assert expected4.freq is None for (rng, expected) in [ (rng2, expected2), @@ -265,9 +264,7 @@ def test_intersection(self, tz, sort): if sort is None: expected = expected.sort_values() tm.assert_index_equal(result, expected) - assert result.name == expected.name assert result.freq is None - assert result.tz == expected.tz # parametrize over both anchored and non-anchored freqs, as they # have different code paths diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index 0e5abe2f5ccd1..19cbd74b31172 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -290,3 +290,17 @@ def test_freq_setter_errors(self): # setting with non-freq string with pytest.raises(ValueError, match="Invalid frequency"): idx._data.freq = "foo" + + def test_freq_view_safe(self): + # Setting the freq for one TimedeltaIndex shouldn't alter the freq + # for another that views the same data + + tdi = TimedeltaIndex(["0 days", "2 days", "4 days"], freq="2D") + tda = tdi._data + + tdi2 = TimedeltaIndex(tda)._with_freq(None) + assert tdi2.freq is None + + # Original was not altered + assert tdi.freq == "2D" + assert tda.freq == "2D" diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index f724badd51da8..637a2629dda8a 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -32,7 +32,9 @@ def indices(self): def create_index(self) -> TimedeltaIndex: index = pd.to_timedelta(range(5), unit="d")._with_freq("infer") assert index.freq == "D" - return index + pd.offsets.Hour(1) + ret = index + pd.offsets.Hour(1) + assert ret.freq == "D" + return ret def test_numeric_compat(self): # Dummy method to override super's version; this test is now done diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index e70a06cc5f582..280424c68297f 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -386,7 +386,7 @@ def test_write_index(self, engine): for index in indexes: df.index = index if isinstance(index, pd.DatetimeIndex): - index._set_freq(None) # freq doesnt round-trip + df.index = df.index._with_freq(None) # freq doesnt round-trip check_round_trip(df, engine, check_names=check_names) # index with meta-data @@ -465,7 +465,7 @@ def test_basic(self, pa, df_full): # additional supported types for pyarrow dti = pd.date_range("20130101", periods=3, tz="Europe/Brussels") - dti._set_freq(None) # freq doesnt round-trip + dti = dti._with_freq(None) # freq doesnt round-trip df["datetime_tz"] = dti df["bool_with_none"] = [True, None, True] @@ -634,7 +634,7 @@ def test_basic(self, fp, df_full): df = df_full dti = pd.date_range("20130101", periods=3, tz="US/Eastern") - dti._set_freq(None) # freq doesnt round-trip + dti = dti._with_freq(None) # freq doesnt round-trip df["datetime_tz"] = dti df["timedelta"] = pd.timedelta_range("1 day", periods=3) check_round_trip(df, fp) diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index 16163ee76ba63..c7a04843b8296 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -378,7 +378,7 @@ def test_ser_cmp_result_names(self, names, op): # datetime64tz dtype dti = dti.tz_localize("US/Central") - dti._set_freq("infer") # freq not preserved by tz_localize + dti = pd.DatetimeIndex(dti, freq="infer") # freq not preserved by tz_localize ser = Series(dti).rename(names[1]) result = op(ser, dti) assert result.name == names[2] diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 43461d465b9e7..1ba73292dc0b4 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -1505,7 +1505,7 @@ def test_set_index_datetime(self): tz="US/Eastern", ) idx3 = pd.date_range("2011-01-01 09:00", periods=6, tz="Asia/Tokyo") - idx3._set_freq(None) + idx3 = idx3._with_freq(None) df = df.set_index(idx1) df = df.set_index(idx2, append=True)