diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 7b9efd7f593dd..ede720bbbec40 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -535,11 +535,12 @@ Other - Bug in :func:`assert_almost_equal` now throwing assertion error for two unequal sets (:issue:`51727`) - Bug in :func:`assert_frame_equal` checks category dtypes even when asked not to check index type (:issue:`52126`) - Bug in :meth:`DataFrame.reindex` with a ``fill_value`` that should be inferred with a :class:`ExtensionDtype` incorrectly inferring ``object`` dtype (:issue:`52586`) +- Bug in :meth:`DataFrame.shift` and :meth:`Series.shift` when passing both "freq" and "fill_value" silently ignoring "fill_value" instead of raising ``ValueError`` (:issue:`53832`) +- Bug in :meth:`DataFrame.shift` with ``axis=1`` on a :class:`DataFrame` with a single :class:`ExtensionDtype` column giving incorrect results (:issue:`53832`) - Bug in :meth:`Series.align`, :meth:`DataFrame.align`, :meth:`Series.reindex`, :meth:`DataFrame.reindex`, :meth:`Series.interpolate`, :meth:`DataFrame.interpolate`, incorrectly failing to raise with method="asfreq" (:issue:`53620`) - Bug in :meth:`Series.map` when giving a callable to an empty series, the returned series had ``object`` dtype. It now keeps the original dtype (:issue:`52384`) - Bug in :meth:`Series.memory_usage` when ``deep=True`` throw an error with Series of objects and the returned value is incorrect, as it does not take into account GC corrections (:issue:`51858`) - Fixed incorrect ``__name__`` attribute of ``pandas._libs.json`` (:issue:`52898`) -- .. ***DO NOT USE THIS SECTION*** diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4088736dd4150..1630c541c5701 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5483,45 +5483,41 @@ def shift( ) -> DataFrame: axis = self._get_axis_number(axis) + if freq is not None and fill_value is not lib.no_default: + # GH#53832 + raise ValueError( + "Cannot pass both 'freq' and 'fill_value' to " + f"{type(self).__name__}.shift" + ) + ncols = len(self.columns) - if ( - axis == 1 - and periods != 0 - and freq is None - and fill_value is lib.no_default - and ncols > 0 - ): - # We will infer fill_value to match the closest column - - # Use a column that we know is valid for our column's dtype GH#38434 - label = self.columns[0] - - if periods > 0: - result = self.iloc[:, :-periods] - for col in range(min(ncols, abs(periods))): - # TODO(EA2D): doing this in a loop unnecessary with 2D EAs - # Define filler inside loop so we get a copy - filler = self.iloc[:, 0].shift(len(self)) - result.insert(0, label, filler, allow_duplicates=True) - else: - result = self.iloc[:, -periods:] - for col in range(min(ncols, abs(periods))): - # Define filler inside loop so we get a copy - filler = self.iloc[:, -1].shift(len(self)) - result.insert( - len(result.columns), label, filler, allow_duplicates=True - ) + arrays = self._mgr.arrays + if axis == 1 and periods != 0 and ncols > 0 and freq is None: + if fill_value is lib.no_default: + # We will infer fill_value to match the closest column - result.columns = self.columns.copy() - return result - elif ( - axis == 1 - and periods != 0 - and fill_value is not lib.no_default - and ncols > 0 - ): - arrays = self._mgr.arrays - if len(arrays) > 1 or ( + # Use a column that we know is valid for our column's dtype GH#38434 + label = self.columns[0] + + if periods > 0: + result = self.iloc[:, :-periods] + for col in range(min(ncols, abs(periods))): + # TODO(EA2D): doing this in a loop unnecessary with 2D EAs + # Define filler inside loop so we get a copy + filler = self.iloc[:, 0].shift(len(self)) + result.insert(0, label, filler, allow_duplicates=True) + else: + result = self.iloc[:, -periods:] + for col in range(min(ncols, abs(periods))): + # Define filler inside loop so we get a copy + filler = self.iloc[:, -1].shift(len(self)) + result.insert( + len(result.columns), label, filler, allow_duplicates=True + ) + + result.columns = self.columns.copy() + return result + elif len(arrays) > 1 or ( # If we only have one block and we know that we can't # keep the same dtype (i.e. the _can_hold_element check) # then we can go through the reindex_indexer path @@ -5549,6 +5545,8 @@ def shift( ) res_df = self._constructor(mgr) return res_df.__finalize__(self, method="shift") + else: + return self.T.shift(periods=periods, fill_value=fill_value).T return super().shift( periods=periods, freq=freq, axis=axis, fill_value=fill_value diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0be840f9a4ef1..4d50d4519859c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10434,7 +10434,7 @@ def shift( periods: int = 1, freq=None, axis: Axis = 0, - fill_value: Hashable = None, + fill_value: Hashable = lib.no_default, ) -> Self: """ Shift index by desired number of periods with an optional time `freq`. @@ -10532,6 +10532,15 @@ def shift( 2020-01-07 30 33 37 2020-01-08 45 48 52 """ + axis = self._get_axis_number(axis) + + if freq is not None and fill_value is not lib.no_default: + # GH#53832 + raise ValueError( + "Cannot pass both 'freq' and 'fill_value' to " + f"{type(self).__name__}.shift" + ) + if periods == 0: return self.copy(deep=None) @@ -10543,6 +10552,11 @@ def shift( ) return self._constructor(new_data).__finalize__(self, method="shift") + return self._shift_with_freq(periods, axis, freq) + + @final + def _shift_with_freq(self, periods: int, axis: int, freq) -> Self: + # see shift.__doc__ # when freq is given, index is shifted, data is not index = self._get_axis(axis) diff --git a/pandas/tests/frame/methods/test_shift.py b/pandas/tests/frame/methods/test_shift.py index 529be6850b3ba..ebbb7ca13646f 100644 --- a/pandas/tests/frame/methods/test_shift.py +++ b/pandas/tests/frame/methods/test_shift.py @@ -17,6 +17,35 @@ class TestDataFrameShift: + def test_shift_axis1_with_valid_fill_value_one_array(self): + # Case with axis=1 that does not go through the "len(arrays)>1" path + # in DataFrame.shift + data = np.random.randn(5, 3) + df = DataFrame(data) + res = df.shift(axis=1, periods=1, fill_value=12345) + expected = df.T.shift(periods=1, fill_value=12345).T + tm.assert_frame_equal(res, expected) + + # same but with an 1D ExtensionArray backing it + df2 = df[[0]].astype("Float64") + res2 = df2.shift(axis=1, periods=1, fill_value=12345) + expected2 = DataFrame([12345] * 5, dtype="Float64") + tm.assert_frame_equal(res2, expected2) + + def test_shift_disallow_freq_and_fill_value(self, frame_or_series): + # Can't pass both! + obj = frame_or_series( + np.random.randn(5), index=date_range("1/1/2000", periods=5, freq="H") + ) + + msg = "Cannot pass both 'freq' and 'fill_value' to (Series|DataFrame).shift" + with pytest.raises(ValueError, match=msg): + obj.shift(1, fill_value=1, freq="H") + + if frame_or_series is DataFrame: + with pytest.raises(ValueError, match=msg): + obj.shift(1, axis=1, fill_value=1, freq="H") + @pytest.mark.parametrize( "input_data, output_data", [(np.empty(shape=(0,)), []), (np.ones(shape=(2,)), [np.nan, 1.0])],