diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 597a0c5386cf0..823bfc75e4304 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -544,6 +544,7 @@ Datetimelike - Bug in :meth:`DatetimeIndex.tz_localize` incorrectly retaining ``freq`` in some cases where the original freq is no longer valid (:issue:`30511`) - Bug in :meth:`DatetimeIndex.intersection` losing ``freq`` and timezone in some cases (:issue:`33604`) - Bug in :class:`DatetimeIndex` addition and subtraction with some types of :class:`DateOffset` objects incorrectly retaining an invalid ``freq`` attribute (:issue:`33779`) +- Bug in :class:`DatetimeIndex` where setting the ``freq`` attribute on an index could silently change the ``freq`` attribute on another index viewing the same data (:issue:`33552`) Timedelta ^^^^^^^^^ @@ -570,6 +571,7 @@ Numeric - Bug in :meth:`DataFrame.count` with ``level="foo"`` and index level ``"foo"`` containing NaNs causes segmentation fault (:issue:`21824`) - Bug in :meth:`DataFrame.diff` with ``axis=1`` returning incorrect results with mixed dtypes (:issue:`32995`) - Bug in :meth:`DataFrame.corr` and :meth:`DataFrame.cov` raising when handling nullable integer columns with ``pandas.NA`` (:issue:`33803`) +- Bug in :class:`DataFrame` and :class:`Series` addition and subtraction between object-dtype objects and ``datetime64`` dtype objects (:issue:`33824`) Conversion ^^^^^^^^^^ diff --git a/pandas/core/array_algos/transforms.py b/pandas/core/array_algos/transforms.py index b8b234d937292..371425f325d76 100644 --- a/pandas/core/array_algos/transforms.py +++ b/pandas/core/array_algos/transforms.py @@ -10,9 +10,8 @@ def shift(values: np.ndarray, periods: int, axis: int, fill_value) -> np.ndarray: new_values = values - if periods == 0: - # TODO: should we copy here? - return new_values + if periods == 0 or values.size == 0: + return new_values.copy() # make sure array sent to np.roll is c_contiguous f_ordered = values.flags.f_contiguous diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index a091476640e07..de401368d55d7 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1196,7 +1196,7 @@ def shift(self, periods, fill_value=None): fill_value = self._validate_fill_value(fill_value) - codes = shift(codes.copy(), periods, axis=0, fill_value=fill_value) + codes = shift(codes, periods, axis=0, fill_value=fill_value) return self._constructor(codes, dtype=self.dtype, fastpath=True) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 9d3ec284a2569..af7beb0b32be0 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -699,8 +699,6 @@ def _values_for_argsort(self): @Appender(ExtensionArray.shift.__doc__) def shift(self, periods=1, fill_value=None, axis=0): - if not self.size or periods == 0: - return self.copy() fill_value = self._validate_shift_value(fill_value) new_values = shift(self._data, periods, axis, fill_value) @@ -745,7 +743,9 @@ def _validate_shift_value(self, fill_value): # TODO(2.0): once this deprecation is enforced, used _validate_fill_value if is_valid_nat_for_dtype(fill_value, self.dtype): fill_value = NaT - elif not isinstance(fill_value, self._recognized_scalars): + elif isinstance(fill_value, self._recognized_scalars): + fill_value = self._scalar_type(fill_value) + else: # only warn if we're not going to raise if self._scalar_type is Period and lib.is_integer(fill_value): # kludge for #31971 since Period(integer) tries to cast to str diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index c40fad6b434d0..afca4ca86bd3f 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2237,7 +2237,12 @@ def diff(self, n: int, axis: int = 0) -> List["Block"]: # Cannot currently calculate diff across multiple blocks since this # function is invoked via apply raise NotImplementedError - new_values = (self.values - self.shift(n, axis=axis)[0].values).asi8 + + if n == 0: + # Fastpath avoids making a copy in `shift` + new_values = np.zeros(self.values.shape, dtype=np.int64) + else: + new_values = (self.values - self.shift(n, axis=axis)[0].values).asi8 # Reshape the new_values like how algos.diff does for timedelta data new_values = new_values.reshape(1, len(new_values)) diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 7d80ad3d8c6be..804654451a6d9 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -374,6 +374,40 @@ def test_searchsorted_invalid_types(self, other, index): with pytest.raises(TypeError, match=msg): arr.searchsorted(other) + def test_shift_fill_value(self): + dti = pd.date_range("2016-01-01", periods=3) + + dta = dti._data + expected = DatetimeArray(np.roll(dta._data, 1)) + + fv = dta[-1] + for fill_value in [fv, fv.to_pydatetime(), fv.to_datetime64()]: + result = dta.shift(1, fill_value=fill_value) + tm.assert_datetime_array_equal(result, expected) + + dta = dta.tz_localize("UTC") + expected = expected.tz_localize("UTC") + fv = dta[-1] + for fill_value in [fv, fv.to_pydatetime()]: + result = dta.shift(1, fill_value=fill_value) + tm.assert_datetime_array_equal(result, expected) + + def test_shift_value_tzawareness_mismatch(self): + dti = pd.date_range("2016-01-01", periods=3) + + dta = dti._data + + fv = dta[-1].tz_localize("UTC") + for invalid in [fv, fv.to_pydatetime()]: + with pytest.raises(TypeError, match="Cannot compare"): + dta.shift(1, fill_value=invalid) + + dta = dta.tz_localize("UTC") + fv = dta[-1].tz_localize(None) + for invalid in [fv, fv.to_pydatetime(), fv.to_datetime64()]: + with pytest.raises(TypeError, match="Cannot compare"): + dta.shift(1, fill_value=invalid) + class TestSequenceToDT64NS: def test_tz_dtype_mismatch_raises(self): diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 9c465e264d8a1..6af74b9a022b7 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -237,6 +237,13 @@ def test_container_shift(self, data, frame, periods, indices): compare(result, expected) + def test_shift_0_periods(self, data): + # GH#33856 shifting with periods=0 should return a copy, not same obj + result = data.shift(0) + assert data[0] != data[1] # otherwise below is invalid + data[0] = data[1] + assert result[0] != result[1] # i.e. not the same object/view + @pytest.mark.parametrize("periods", [1, -2]) def test_diff(self, data, periods): data = data[:5] diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index 694bbee59606f..19ac25eb0ccf7 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -309,6 +309,13 @@ def test_searchsorted(self, data_for_sorting, as_series): with tm.assert_produces_warning(PerformanceWarning): super().test_searchsorted(data_for_sorting, as_series) + def test_shift_0_periods(self, data): + # GH#33856 shifting with periods=0 should return a copy, not same obj + result = data.shift(0) + + data._sparse_values[0] = data._sparse_values[1] + assert result._sparse_values[0] != result._sparse_values[1] + class TestCasting(BaseSparseTests, base.BaseCastingTests): def test_astype_object_series(self, all_data):