diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 48a3886c20a3a..da59c635b5a18 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -114,7 +114,18 @@ def apply_index_wraps(func): # Note: normally we would use `@functools.wraps(func)`, but this does # not play nicely with cython class methods def wrapper(self, other): - result = func(self, other) + + is_index = getattr(other, "_typ", "") == "datetimeindex" + + # operate on DatetimeArray + arr = other._data if is_index else other + + result = func(self, arr) + + if is_index: + # Wrap DatetimeArray result back to DatetimeIndex + result = type(other)._simple_new(result, name=other.name) + if self.normalize: result = result.to_period('D').to_timestamp() return result diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index cef8a39d75a4c..23e68802eb126 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3281,13 +3281,11 @@ def reindex(self, target, method=None, level=None, limit=None, tolerance=None): target = _ensure_has_len(target) # target may be an iterator if not isinstance(target, Index) and len(target) == 0: - attrs = self._get_attributes_dict() - attrs.pop("freq", None) # don't preserve freq if isinstance(self, ABCRangeIndex): values = range(0) else: values = self._data[:0] # appropriately-dtyped empty array - target = self._simple_new(values, **attrs) + target = self._simple_new(values, name=self.name) else: target = ensure_index(target) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index bf35c85ac8ed5..054a64bf3f990 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -622,21 +622,11 @@ def _shallow_copy(self, values=None, name: Label = lib.no_default): if values is None: values = self._data - if isinstance(values, type(self)): - values = values._data if isinstance(values, np.ndarray): # TODO: We would rather not get here values = type(self._data)(values, dtype=self.dtype) - attributes = self._get_attributes_dict() - - if self.freq is not None: - if isinstance(values, (DatetimeArray, TimedeltaArray)): - if values.freq is None: - del attributes["freq"] - - attributes["name"] = name - result = self._simple_new(values, **attributes) + result = type(self)._simple_new(values, name=name) result._cache = cache return result @@ -780,7 +770,10 @@ def _fast_union(self, other, sort=None): loc = right.searchsorted(left_start, side="left") right_chunk = right.values[:loc] dates = concat_compat((left.values, right_chunk)) - return self._shallow_copy(dates) + result = self._shallow_copy(dates) + result._set_freq("infer") + # TODO: can we infer that it has self.freq? + return result else: left, right = other, self @@ -792,7 +785,10 @@ def _fast_union(self, other, sort=None): loc = right.searchsorted(left_end, side="right") right_chunk = right.values[loc:] dates = concat_compat((left.values, right_chunk)) - return self._shallow_copy(dates) + result = self._shallow_copy(dates) + result._set_freq("infer") + # TODO: can we infer that it has self.freq? + return result else: return left diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index c8035a9de432b..e791133220dbf 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -7,17 +7,13 @@ from pandas._libs import NaT, Period, Timestamp, index as libindex, lib, tslib as libts from pandas._libs.tslibs import fields, parsing, timezones +from pandas._typing import Label from pandas.util._decorators import cache_readonly from pandas.core.dtypes.common import _NS_DTYPE, is_float, is_integer, is_scalar -from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.core.dtypes.missing import is_valid_nat_for_dtype -from pandas.core.arrays.datetimes import ( - DatetimeArray, - tz_to_dtype, - validate_tz_from_dtype, -) +from pandas.core.arrays.datetimes import DatetimeArray, tz_to_dtype import pandas.core.common as com from pandas.core.indexes.base import Index, InvalidIndexError, maybe_extract_name from pandas.core.indexes.datetimelike import DatetimeTimedeltaMixin @@ -36,7 +32,20 @@ def _new_DatetimeIndex(cls, d): if "data" in d and not isinstance(d["data"], DatetimeIndex): # Avoid need to verify integrity by calling simple_new directly data = d.pop("data") - result = cls._simple_new(data, **d) + if not isinstance(data, DatetimeArray): + # For backward compat with older pickles, we may need to construct + # a DatetimeArray to adapt to the newer _simple_new signature + tz = d.pop("tz") + freq = d.pop("freq") + dta = DatetimeArray._simple_new(data, dtype=tz_to_dtype(tz), freq=freq) + else: + dta = data + for key in ["tz", "freq"]: + # These are already stored in our DatetimeArray; if they are + # also in the pickle and don't match, we have a problem. + if key in d: + assert d.pop(key) == getattr(dta, key) + result = cls._simple_new(dta, **d) else: with warnings.catch_warnings(): # TODO: If we knew what was going in to **d, we might be able to @@ -244,34 +253,16 @@ def __new__( return subarr @classmethod - def _simple_new(cls, values, name=None, freq=None, tz=None, dtype=None): - """ - We require the we have a dtype compat for the values - if we are passed a non-dtype compat, then coerce using the constructor - """ - if isinstance(values, DatetimeArray): - if tz: - tz = validate_tz_from_dtype(dtype, tz) - dtype = DatetimeTZDtype(tz=tz) - elif dtype is None: - dtype = _NS_DTYPE - - values = DatetimeArray(values, freq=freq, dtype=dtype) - tz = values.tz - freq = values.freq - values = values._data - - dtype = tz_to_dtype(tz) - dtarr = DatetimeArray._simple_new(values, freq=freq, dtype=dtype) - assert isinstance(dtarr, DatetimeArray) + def _simple_new(cls, values: DatetimeArray, name: Label = None): + assert isinstance(values, DatetimeArray), type(values) result = object.__new__(cls) - result._data = dtarr + result._data = values result.name = name result._cache = {} result._no_setting_name = False # For groupby perf. See note in indexes/base about _index_data - result._index_data = dtarr._data + result._index_data = values._data result._reset_identity() return result diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index a6d9d4dfc330b..7a7670b0e7965 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -1,6 +1,7 @@ """ implement the TimedeltaIndex """ from pandas._libs import NaT, Timedelta, index as libindex +from pandas._typing import Label from pandas.util._decorators import Appender from pandas.core.dtypes.common import ( @@ -154,7 +155,7 @@ def __new__( if isinstance(data, TimedeltaArray) and freq is None: if copy: data = data.copy() - return cls._simple_new(data, name=name, freq=freq) + return cls._simple_new(data, name=name) if isinstance(data, TimedeltaIndex) and freq is None and name is None: if copy: @@ -170,12 +171,8 @@ def __new__( return cls._simple_new(tdarr, name=name) @classmethod - def _simple_new(cls, values, name=None, freq=None, dtype=_TD_DTYPE): - # `dtype` is passed by _shallow_copy in corner cases, should always - # be timedelta64[ns] if present - assert dtype == _TD_DTYPE, dtype + def _simple_new(cls, values: TimedeltaArray, name: Label = None): assert isinstance(values, TimedeltaArray) - assert freq is None or values.freq == freq result = object.__new__(cls) result._data = values diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index b8a70752330c5..e505917da1dc4 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -65,8 +65,8 @@ def test_compare_len1_raises(self): # to the case where one has length-1, which numpy would broadcast data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9 - idx = self.array_cls._simple_new(data, freq="D") - arr = self.index_cls(idx) + arr = self.array_cls._simple_new(data, freq="D") + idx = self.index_cls(arr) with pytest.raises(ValueError, match="Lengths must match"): arr == arr[:1] diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 8ed98410ad9a4..a533d06a924e6 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -363,7 +363,7 @@ def test_equals(self): assert not idx.equals(pd.Series(idx2)) # same internal, different tz - idx3 = pd.DatetimeIndex._simple_new(idx.asi8, tz="US/Pacific") + idx3 = pd.DatetimeIndex(idx.asi8, tz="US/Pacific") tm.assert_numpy_array_equal(idx.asi8, idx3.asi8) assert not idx.equals(idx3) assert not idx.equals(idx3.copy()) diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index b6bbe008812cb..bc20d784c8dee 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -337,9 +337,6 @@ def apply_index(self, i): # integer addition on PeriodIndex is deprecated, # so we directly use _time_shift instead asper = i.to_period("W") - if not isinstance(asper._data, np.ndarray): - # unwrap PeriodIndex --> PeriodArray - asper = asper._data shifted = asper._time_shift(weeks) i = shifted.to_timestamp() + i.to_perioddelta("W") @@ -629,9 +626,6 @@ def apply_index(self, i): # to_period rolls forward to next BDay; track and # reduce n where it does when rolling forward asper = i.to_period("B") - if not isinstance(asper._data, np.ndarray): - # unwrap PeriodIndex --> PeriodArray - asper = asper._data if self.n > 0: shifted = (i.to_perioddelta("B") - time).asi8 != 0 @@ -1384,9 +1378,6 @@ def apply_index(self, i): # integer-array addition on PeriodIndex is deprecated, # so we use _addsub_int_array directly asper = i.to_period("M") - if not isinstance(asper._data, np.ndarray): - # unwrap PeriodIndex --> PeriodArray - asper = asper._data shifted = asper._addsub_int_array(roll // 2, operator.add) i = type(dti)(shifted.to_timestamp()) @@ -1582,9 +1573,6 @@ def apply_index(self, i): # integer addition on PeriodIndex is deprecated, # so we use _time_shift directly asper = i.to_period("W") - if not isinstance(asper._data, np.ndarray): - # unwrap PeriodIndex --> PeriodArray - asper = asper._data shifted = asper._time_shift(self.n) return shifted.to_timestamp() + i.to_perioddelta("W") @@ -1608,9 +1596,6 @@ def _end_apply_index(self, dtindex): base, mult = libfrequencies.get_freq_code(self.freqstr) base_period = dtindex.to_period(base) - if not isinstance(base_period._data, np.ndarray): - # unwrap PeriodIndex --> PeriodArray - base_period = base_period._data if self.n > 0: # when adding, dates on end roll to next