From a075ab552c81d3560a77de380c80ea2252a5fbe4 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 21 Dec 2021 12:14:56 -0800 Subject: [PATCH 1/2] REGR: DataFrame.shift with periods>len(columns) GH#44978 --- doc/source/whatsnew/v1.4.0.rst | 1 + pandas/core/internals/managers.py | 5 +++-- pandas/tests/frame/methods/test_shift.py | 11 +++++++++++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 2592be9c4a350..7e2d9985e6a5b 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -871,6 +871,7 @@ Other - Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` with ``value=None`` and ExtensionDtypes (:issue:`44270`) - Bug in :meth:`FloatingArray.equals` failing to consider two arrays equal if they contain ``np.nan`` values (:issue:`44382`) - Bug in :meth:`DataFrame.shift` with ``axis=1`` and ``ExtensionDtype`` columns incorrectly raising when an incompatible ``fill_value`` is passed (:issue:`44564`) +- Bug in :meth:`DataFrame.shift` with ``axis=1`` and ``periods`` larger than ``len(frame.columns)`` producing an invalid :class:`DataFrame` (:issue:`44978`) - Bug in :meth:`DataFrame.diff` when passing a NumPy integer object instead of an ``int`` object (:issue:`44572`) - Bug in :meth:`Series.replace` raising ``ValueError`` when using ``regex=True`` with a :class:`Series` containing ``np.nan`` values (:issue:`43344`) - Bug in :meth:`DataFrame.to_records` where an incorrect ``n`` was used when missing names were replaced by ``level_n`` (:issue:`44818`) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 5ebc0292f24b4..3e0b62da64f42 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -388,12 +388,13 @@ def shift(self: T, periods: int, axis: int, fill_value) -> T: # GH#35488 we need to watch out for multi-block cases # We only get here with fill_value not-lib.no_default ncols = self.shape[0] + nper = abs(periods) + nper = min(nper, ncols) if periods > 0: indexer = np.array( - [-1] * periods + list(range(ncols - periods)), dtype=np.intp + [-1] * nper + list(range(ncols - periods)), dtype=np.intp ) else: - nper = abs(periods) indexer = np.array( list(range(nper, ncols)) + [-1] * nper, dtype=np.intp ) diff --git a/pandas/tests/frame/methods/test_shift.py b/pandas/tests/frame/methods/test_shift.py index c92e3dbe27439..6ea6959ede971 100644 --- a/pandas/tests/frame/methods/test_shift.py +++ b/pandas/tests/frame/methods/test_shift.py @@ -664,3 +664,14 @@ def test_shift_axis1_categorical_columns(self): columns=ci, ) tm.assert_frame_equal(result, expected) + + def test_shift_axis1_many_periods(self): + # GH#44978 periods > len(columns) + df = DataFrame(np.random.rand(5, 3)) + shifted = df.shift(6, axis=1, fill_value=None) + + expected = df * np.nan + tm.assert_frame_equal(shifted, expected) + + shifted2 = df.shift(-6, axis=1, fill_value=None) + tm.assert_frame_equal(shifted2, expected) From fadb5160c020c21d9e4a2bdceaf0e1fbd0c7145f Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 22 Dec 2021 09:21:21 -0800 Subject: [PATCH 2/2] xfail for ArrayManager --- pandas/tests/frame/methods/test_shift.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/frame/methods/test_shift.py b/pandas/tests/frame/methods/test_shift.py index 6ea6959ede971..2463e81d78edd 100644 --- a/pandas/tests/frame/methods/test_shift.py +++ b/pandas/tests/frame/methods/test_shift.py @@ -665,6 +665,7 @@ def test_shift_axis1_categorical_columns(self): ) tm.assert_frame_equal(result, expected) + @td.skip_array_manager_not_yet_implemented def test_shift_axis1_many_periods(self): # GH#44978 periods > len(columns) df = DataFrame(np.random.rand(5, 3))