Skip to content

BUG: Setting DTI/TDI freq affecting other indexes viewing the same data #33552

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Apr 25, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
133 changes: 102 additions & 31 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
Base and utility classes for tseries type pandas objects.
"""
from datetime import datetime
from datetime import datetime, timedelta
from typing import Any, List, Optional, Union, cast

import numpy as np
Expand All @@ -17,14 +17,18 @@
ensure_int64,
ensure_platform_int,
is_bool_dtype,
is_datetime64_any_dtype,
is_dtype_equal,
is_integer,
is_list_like,
is_object_dtype,
is_period_dtype,
is_scalar,
is_timedelta64_dtype,
)
from pandas.core.dtypes.concat import concat_compat
from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries
from pandas.core.dtypes.missing import isna

from pandas.core import algorithms
from pandas.core.arrays import DatetimeArray, PeriodArray, TimedeltaArray
Expand All @@ -41,7 +45,8 @@
from pandas.core.ops import get_op_result_name
from pandas.core.tools.timedeltas import to_timedelta

from pandas.tseries.frequencies import DateOffset
from pandas.tseries.frequencies import DateOffset, to_offset
from pandas.tseries.offsets import Tick

_index_doc_kwargs = dict(ibase._index_doc_kwargs)

Expand Down Expand Up @@ -72,13 +77,33 @@ def wrapper(left, right):
return wrapper


def _make_wrapped_arith_op_with_freq(opname: str):
"""
Dispatch the operation to the underlying ExtensionArray, and infer
the appropriate frequency for the result.
"""
meth = make_wrapped_arith_op(opname)

def wrapped(self, other):
result = meth(self, other)
if result is NotImplemented:
return NotImplemented

new_freq = self._get_addsub_freq(other)
result._freq = new_freq
return result

wrapped.__name__ = opname
return wrapped


@inherit_names(
["inferred_freq", "_isnan", "_resolution", "resolution"],
DatetimeLikeArrayMixin,
cache=True,
)
@inherit_names(
["mean", "freq", "freqstr", "asi8", "_box_func"], DatetimeLikeArrayMixin,
["mean", "asi8", "_box_func"], DatetimeLikeArrayMixin,
)
class DatetimeIndexOpsMixin(ExtensionIndex):
"""
Expand Down Expand Up @@ -446,10 +471,45 @@ def get_indexer_non_unique(self, target):
return ensure_platform_int(indexer), missing

# --------------------------------------------------------------------
# Arithmetic Methods

def _get_addsub_freq(self, other) -> Optional[DateOffset]:
"""
Find the freq we expect the result of an addition/subtraction operation
to have.
"""
if is_period_dtype(self.dtype):
# Only used for ops that stay PeriodDtype
return self.freq
elif self.freq is None:
return None
elif lib.is_scalar(other) and isna(other):
return None

elif isinstance(other, (Tick, timedelta, np.timedelta64)):
new_freq = None
if isinstance(self.freq, Tick):
new_freq = self.freq
return new_freq

elif isinstance(other, DateOffset):
# otherwise just DatetimeArray
return None # TODO: Should we infer if it matches self.freq * n?
elif isinstance(other, (datetime, np.datetime64)):
return self.freq

elif is_timedelta64_dtype(other):
return None # TODO: shouldnt we be able to do self.freq + other.freq?
elif is_object_dtype(other):
return None # TODO: is this quite right? sometimes we unpack singletons
elif is_datetime64_any_dtype(other):
return None # TODO: shouldnt we be able to do self.freq + other.freq?
else:
raise NotImplementedError

__add__ = make_wrapped_arith_op("__add__")
__add__ = _make_wrapped_arith_op_with_freq("__add__")
__sub__ = _make_wrapped_arith_op_with_freq("__sub__")
__radd__ = make_wrapped_arith_op("__radd__")
__sub__ = make_wrapped_arith_op("__sub__")
__rsub__ = make_wrapped_arith_op("__rsub__")
__pow__ = make_wrapped_arith_op("__pow__")
__rpow__ = make_wrapped_arith_op("__rpow__")
Expand Down Expand Up @@ -558,7 +618,9 @@ def shift(self, periods=1, freq=None):
Index.shift : Shift values of Index.
PeriodIndex.shift : Shift values of PeriodIndex.
"""
result = self._data._time_shift(periods, freq=freq)
arr = self._data.view()
arr._freq = self.freq
result = arr._time_shift(periods, freq=freq)
return type(self)(result, name=self.name)

# --------------------------------------------------------------------
Expand Down Expand Up @@ -610,21 +672,40 @@ class DatetimeTimedeltaMixin(DatetimeIndexOpsMixin, Int64Index):
_is_monotonic_increasing = Index.is_monotonic_increasing
_is_monotonic_decreasing = Index.is_monotonic_decreasing
_is_unique = Index.is_unique
_freq = lib.no_default

def _set_freq(self, freq):
@property
def freq(self):
"""
In limited circumstances, our freq may differ from that of our _data.
"""
Set the _freq attribute on our underlying DatetimeArray.
if self._freq is not lib.no_default:
return self._freq
return self._data.freq

Parameters
----------
freq : DateOffset, None, or "infer"
@property
def freqstr(self):
"""
Return the frequency object as a string if its set, otherwise None.
"""
# GH#29843
self._data._with_freq(freq)
if self.freq is None:
return None
return self.freq.freqstr

def _with_freq(self, freq):
index = self.copy(deep=False)
index._set_freq(freq)
if freq is None:
# Even if we _can_ have a freq, we might want to set it to None
index._freq = None
elif len(self) == 0 and isinstance(freq, DateOffset):
# Always valid. In the TimedeltaArray case, we assume this
# is a Tick offset.
index._freq = freq
else:
assert freq == "infer", freq
freq = to_offset(self.inferred_freq)
index._freq = freq

return index

def _shallow_copy(self, values=None, name: Label = lib.no_default):
Expand All @@ -647,8 +728,7 @@ def _shallow_copy(self, values=None, name: Label = lib.no_default):

@Appender(Index.difference.__doc__)
def difference(self, other, sort=None):
new_idx = super().difference(other, sort=sort)
new_idx._set_freq(None)
new_idx = super().difference(other, sort=sort)._with_freq(None)
return new_idx

def intersection(self, other, sort=False):
Expand Down Expand Up @@ -693,7 +773,7 @@ def intersection(self, other, sort=False):
result = Index.intersection(self, other, sort=sort)
if isinstance(result, type(self)):
if result.freq is None:
result._set_freq("infer")
result = result._with_freq("infer")
return result

elif (
Expand All @@ -704,14 +784,7 @@ def intersection(self, other, sort=False):
or (not self.is_monotonic or not other.is_monotonic)
):
result = Index.intersection(self, other, sort=sort)

# Invalidate the freq of `result`, which may not be correct at
# this point, depending on the values.

result._set_freq(None)
result = self._shallow_copy(result._data, name=result.name)
if result.freq is None:
result._set_freq("infer")
result = result._with_freq("infer")
return result

# to make our life easier, "sort" the two ranges
Expand Down Expand Up @@ -781,10 +854,9 @@ def _fast_union(self, other, sort=None):
left_start = left[0]
loc = right.searchsorted(left_start, side="left")
right_chunk = right._values[:loc]
dates = concat_compat([left._values, right_chunk])
result = self._shallow_copy(dates)
result._set_freq("infer")
dates = concat_compat((left._values, right_chunk))
# TODO: can we infer that it has self.freq?
result = self._shallow_copy(dates)._with_freq("infer")
return result
else:
left, right = other, self
Expand All @@ -797,9 +869,8 @@ def _fast_union(self, other, sort=None):
loc = right.searchsorted(left_end, side="right")
right_chunk = right._values[loc:]
dates = concat_compat([left._values, right_chunk])
result = self._shallow_copy(dates)
result._set_freq("infer")
# TODO: can we infer that it has self.freq?
result = self._shallow_copy(dates)._with_freq("infer")
return result
else:
return left
Expand All @@ -816,7 +887,7 @@ def _union(self, other, sort):
if this._can_fast_union(other):
result = this._fast_union(other, sort=sort)
if result.freq is None:
result._set_freq("infer")
result = result._with_freq("infer")
return result
else:
i8self = Int64Index._simple_new(self.asi8, name=self.name)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def _new_PeriodIndex(cls, **d):
PeriodArray,
wrap=True,
)
@inherit_names(["is_leap_year", "freq", "_format_native_types"], PeriodArray)
@inherit_names(["is_leap_year", "freq", "freqstr", "_format_native_types"], PeriodArray)
class PeriodIndex(DatetimeIndexOpsMixin, Int64Index):
"""
Immutable ndarray holding ordinal values indicating regular periods in time.
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -1017,7 +1017,8 @@ def _downsample(self, how, **kwargs):
if not len(ax):
# reset to the new freq
obj = obj.copy()
obj.index._set_freq(self.freq)
obj.index = obj.index._with_freq(self.freq)
assert obj.index.freq == self.freq, (obj.index.freq, self.freq)
return obj

# do we have a regular frequency
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/arithmetic/test_datetime64.py
Original file line number Diff line number Diff line change
Expand Up @@ -2052,7 +2052,7 @@ def test_dti_add_tdi(self, tz_naive_fixture):
dti = DatetimeIndex([Timestamp("2017-01-01", tz=tz)] * 10)
tdi = pd.timedelta_range("0 days", periods=10)
expected = pd.date_range("2017-01-01", periods=10, tz=tz)
expected._set_freq(None)
expected = expected._with_freq(None)

# add with TimdeltaIndex
result = dti + tdi
Expand All @@ -2074,7 +2074,7 @@ def test_dti_iadd_tdi(self, tz_naive_fixture):
dti = DatetimeIndex([Timestamp("2017-01-01", tz=tz)] * 10)
tdi = pd.timedelta_range("0 days", periods=10)
expected = pd.date_range("2017-01-01", periods=10, tz=tz)
expected._set_freq(None)
expected = expected._with_freq(None)

# iadd with TimdeltaIndex
result = DatetimeIndex([Timestamp("2017-01-01", tz=tz)] * 10)
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/arithmetic/test_timedelta64.py
Original file line number Diff line number Diff line change
Expand Up @@ -544,7 +544,7 @@ def test_tda_add_sub_index(self):
def test_tda_add_dt64_object_array(self, box_df_fail, tz_naive_fixture):
# Result should be cast back to DatetimeArray
dti = pd.date_range("2016-01-01", periods=3, tz=tz_naive_fixture)
dti._set_freq(None)
dti = dti._with_freq(None)
tdi = dti - dti

obj = tm.box_expected(tdi, box_df_fail)
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/indexes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ def test_ensure_copied_data(self, indices):
if is_datetime64tz_dtype(indices.dtype):
result = result.tz_localize("UTC").tz_convert(indices.tz)
if isinstance(indices, (DatetimeIndex, TimedeltaIndex)):
indices._set_freq(None)
indices = indices._with_freq(None)

tm.assert_index_equal(indices, result)

Expand Down Expand Up @@ -397,7 +397,7 @@ def test_where(self, klass):
i = self.create_index()
if isinstance(i, (pd.DatetimeIndex, pd.TimedeltaIndex)):
# where does not preserve freq
i._set_freq(None)
i = i._with_freq(None)

cond = [True] * len(i)
result = i.where(klass(cond))
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def test_map_dictlike(self, mapper):

# don't compare the freqs
if isinstance(expected, (pd.DatetimeIndex, pd.TimedeltaIndex)):
expected._set_freq(None)
expected = expected._with_freq(None)

result = index.map(mapper(expected, index))
tm.assert_index_equal(result, expected)
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexes/datetimes/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def test_construction_with_alt(self, kwargs, tz_aware_fixture):
def test_construction_with_alt_tz_localize(self, kwargs, tz_aware_fixture):
tz = tz_aware_fixture
i = pd.date_range("20130101", periods=5, freq="H", tz=tz)
i._set_freq(None)
i = i._with_freq(None)
kwargs = {key: attrgetter(val)(i) for key, val in kwargs.items()}

if "tz" in kwargs:
Expand Down
3 changes: 1 addition & 2 deletions pandas/tests/indexes/datetimes/test_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,5 @@ def test_split_non_utc(self):
# GH 14042
indices = pd.date_range("2016-01-01 00:00:00+0200", freq="S", periods=10)
result = np.split(indices, indices_or_sections=[])[0]
expected = indices.copy()
expected._set_freq(None)
expected = indices._with_freq(None)
tm.assert_index_equal(result, expected)
16 changes: 15 additions & 1 deletion pandas/tests/indexes/datetimes/test_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def test_value_counts_unique(self, tz_naive_fixture):

exp_idx = pd.date_range("2011-01-01 18:00", freq="-1H", periods=10, tz=tz)
expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64")
expected.index._set_freq(None)
expected.index = expected.index._with_freq(None)

for obj in [idx, Series(idx)]:

Expand Down Expand Up @@ -406,6 +406,20 @@ def test_freq_setter_errors(self):
with pytest.raises(ValueError, match="Invalid frequency"):
idx._data.freq = "foo"

def test_freq_view_safe(self):
# Setting the freq for one DatetimeIndex shouldn't alter the freq
# for another that views the same data

dti = pd.date_range("2016-01-01", periods=5)
dta = dti._data

dti2 = DatetimeIndex(dta)._with_freq(None)
assert dti2.freq is None

# Original was not altered
assert dti.freq == "D"
assert dta.freq == "D"


class TestBusinessDatetimeIndex:
def setup_method(self, method):
Expand Down
5 changes: 1 addition & 4 deletions pandas/tests/indexes/datetimes/test_setops.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,9 +231,7 @@ def test_intersection(self, tz, sort):
]:
result = base.intersection(rng)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq
assert result.tz == expected.tz

# non-monotonic
base = DatetimeIndex(
Expand All @@ -255,6 +253,7 @@ def test_intersection(self, tz, sort):
# GH 7880
rng4 = date_range("7/1/2000", "7/31/2000", freq="D", tz=tz, name="idx")
expected4 = DatetimeIndex([], tz=tz, name="idx")
assert expected4.freq is None

for (rng, expected) in [
(rng2, expected2),
Expand All @@ -265,9 +264,7 @@ def test_intersection(self, tz, sort):
if sort is None:
expected = expected.sort_values()
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq is None
assert result.tz == expected.tz

# parametrize over both anchored and non-anchored freqs, as they
# have different code paths
Expand Down
Loading