From 73c542da3eff78f4dbb8f852ef2493539601c24f Mon Sep 17 00:00:00 2001 From: "wenjun.swj" Date: Fri, 9 Jul 2021 13:22:55 +0800 Subject: [PATCH 1/3] BUG: Sampling over selected groupbys does not reflect the selection --- doc/source/whatsnew/v1.3.2.rst | 2 +- pandas/core/internals/managers.py | 6 ++++-- pandas/tests/frame/methods/test_shift.py | 26 ++++++++++++++++++++++++ 3 files changed, 31 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.3.2.rst b/doc/source/whatsnew/v1.3.2.rst index b136d6207681b..17eb4e404f46e 100644 --- a/doc/source/whatsnew/v1.3.2.rst +++ b/doc/source/whatsnew/v1.3.2.rst @@ -23,7 +23,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ -- +- Fixed bug in :meth:`DataFrame.shift` where TypeError occurred when shifting DataFrame created by concatenation of slices and fills with values (:issue:`42719`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index a888649d3ed98..2d2f589aa10fe 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -386,10 +386,12 @@ def shift(self: T, periods: int, axis: int, fill_value) -> T: # We only get here with fill_value not-lib.no_default ncols = self.shape[0] if periods > 0: - indexer = [-1] * periods + list(range(ncols - periods)) + indexer = np.concatenate( + [np.repeat(-1, periods), np.arange(ncols - periods)] + ) else: nper = abs(periods) - indexer = list(range(nper, ncols)) + [-1] * nper + indexer = np.concatenate([np.arange(nper, ncols), np.repeat(-1, nper)]) result = self.reindex_indexer( self.items, indexer, diff --git a/pandas/tests/frame/methods/test_shift.py b/pandas/tests/frame/methods/test_shift.py index 0474206aec06f..9df5f79aa7d19 100644 --- a/pandas/tests/frame/methods/test_shift.py +++ b/pandas/tests/frame/methods/test_shift.py @@ -183,6 +183,32 @@ def test_shift_axis1_multiple_blocks(self, using_array_manager): tm.assert_frame_equal(result, expected) + @td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) axis=1 support + def test_shift_axis1_multiple_blocks_with_int_fill(self): + # GH#42719 + df1 = DataFrame(np.random.randint(1000, size=(5, 3))) + df2 = DataFrame(np.random.randint(1000, size=(5, 2))) + df3 = pd.concat([df1.iloc[:4, 1:3], df2.iloc[:4, :]], axis=1) + result = df3.shift(2, axis=1, fill_value=np.int_(0)) + assert len(df3._mgr.blocks) == 2 + + expected = df3.take([-1, -1, 0, 1], axis=1) + expected.iloc[:, :2] = np.int_(0) + expected.columns = df3.columns + + tm.assert_frame_equal(result, expected) + + # Case with periods < 0 + df3 = pd.concat([df1.iloc[:4, 1:3], df2.iloc[:4, :]], axis=1) + result = df3.shift(-2, axis=1, fill_value=np.int_(0)) + assert len(df3._mgr.blocks) == 2 + + expected = df3.take([2, 3, -1, -1], axis=1) + expected.iloc[:, -2:] = np.int_(0) + expected.columns = df3.columns + + tm.assert_frame_equal(result, expected) + @pytest.mark.filterwarnings("ignore:tshift is deprecated:FutureWarning") def test_tshift(self, datetime_frame): # TODO: remove this test when tshift deprecation is enforced From ec8a759c0ccf4f75061937f2451badd9e7ba442f Mon Sep 17 00:00:00 2001 From: wjsi Date: Tue, 27 Jul 2021 00:20:36 +0800 Subject: [PATCH 2/3] Simplify --- pandas/core/internals/managers.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 2d2f589aa10fe..01edfb4066991 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -386,12 +386,14 @@ def shift(self: T, periods: int, axis: int, fill_value) -> T: # We only get here with fill_value not-lib.no_default ncols = self.shape[0] if periods > 0: - indexer = np.concatenate( - [np.repeat(-1, periods), np.arange(ncols - periods)] + indexer = np.array( + [-1] * periods + list(range(ncols - periods)), dtype=np.intp ) else: nper = abs(periods) - indexer = np.concatenate([np.arange(nper, ncols), np.repeat(-1, nper)]) + indexer = np.array( + list(range(nper, ncols)) + [-1] * nper, dtype=np.intp + ) result = self.reindex_indexer( self.items, indexer, From 2cb544175939a49413d84f1aefadeb984a9ecd19 Mon Sep 17 00:00:00 2001 From: "wenjun.swj" Date: Tue, 27 Jul 2021 16:19:02 +0800 Subject: [PATCH 3/3] Fix what's new section --- doc/source/whatsnew/v1.3.2.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.2.rst b/doc/source/whatsnew/v1.3.2.rst index 91b16d9e9d64b..f4804215db8c1 100644 --- a/doc/source/whatsnew/v1.3.2.rst +++ b/doc/source/whatsnew/v1.3.2.rst @@ -17,6 +17,7 @@ Fixed regressions - Performance regression in :meth:`DataFrame.isin` and :meth:`Series.isin` for nullable data types (:issue:`42714`) - Regression in updating values of :class:`pandas.Series` using boolean index, created by using :meth:`pandas.DataFrame.pop` (:issue:`42530`) - Regression in :meth:`DataFrame.from_records` with empty records (:issue:`42456`) +- Fixed regression in :meth:`DataFrame.shift` where TypeError occurred when shifting DataFrame created by concatenation of slices and fills with values (:issue:`42719`) - .. --------------------------------------------------------------------------- @@ -25,7 +26,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ -- Fixed bug in :meth:`DataFrame.shift` where TypeError occurred when shifting DataFrame created by concatenation of slices and fills with values (:issue:`42719`) +- - .. ---------------------------------------------------------------------------