From 87b40fbcc564be082a3ba869f1ab5e0ae694a1ca Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 26 Jan 2022 12:11:28 -0800 Subject: [PATCH 1/2] BUG: frame.shift(axis=1) with ArrayManager --- pandas/core/frame.py | 43 ++++++++++++++++++++++ pandas/core/internals/managers.py | 45 ------------------------ pandas/tests/frame/methods/test_shift.py | 9 +---- 3 files changed, 44 insertions(+), 53 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 693ae2f3203fd..5bb52a62c3fcb 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -93,6 +93,7 @@ ) from pandas.core.dtypes.cast import ( + can_hold_element, construct_1d_arraylike_from_scalar, construct_2d_arraylike_from_scalar, find_common_type, @@ -5366,6 +5367,48 @@ def shift( result.columns = self.columns.copy() return result + elif ( + axis == 1 + and periods != 0 + and fill_value is not lib.no_default + and ncols > 0 + ): + arrays = self._mgr.arrays + if len(arrays) > 1 or ( + # If we only have one block and we know that we can't + # keep the same dtype (i.e. the _can_hold_element check) + # then we can go through the reindex_indexer path + # (and avoid casting logic in the Block method). + # The exception to this (until 2.0) is datetimelike + # dtypes with integers, which cast. + not can_hold_element(arrays[0], fill_value) + # TODO(2.0): remove special case for integer-with-datetimelike + # once deprecation is enforced + and not ( + lib.is_integer(fill_value) and needs_i8_conversion(arrays[0].dtype) + ) + ): + # GH#35488 we need to watch out for multi-block cases + # We only get here with fill_value not-lib.no_default + nper = abs(periods) + nper = min(nper, ncols) + if periods > 0: + indexer = np.array( + [-1] * nper + list(range(ncols - periods)), dtype=np.intp + ) + else: + indexer = np.array( + list(range(nper, ncols)) + [-1] * nper, dtype=np.intp + ) + mgr = self._mgr.reindex_indexer( + self.columns, + indexer, + axis=0, + fill_value=fill_value, + allow_dups=True, + ) + res_df = self._constructor(mgr) + return res_df.__finalize__(self, method="shift") return super().shift( periods=periods, freq=freq, axis=axis, fill_value=fill_value diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 8599a1281a976..af77e1057e310 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -36,7 +36,6 @@ is_1d_only_ea_dtype, is_dtype_equal, is_list_like, - needs_i8_conversion, ) from pandas.core.dtypes.dtypes import ExtensionDtype from pandas.core.dtypes.generic import ( @@ -366,50 +365,6 @@ def shift(self: T, periods: int, axis: int, fill_value) -> T: if fill_value is lib.no_default: fill_value = None - if ( - axis == 0 - and self.ndim == 2 - and ( - self.nblocks > 1 - or ( - # If we only have one block and we know that we can't - # keep the same dtype (i.e. the _can_hold_element check) - # then we can go through the reindex_indexer path - # (and avoid casting logic in the Block method). - # The exception to this (until 2.0) is datetimelike - # dtypes with integers, which cast. - not self.blocks[0]._can_hold_element(fill_value) - # TODO(2.0): remove special case for integer-with-datetimelike - # once deprecation is enforced - and not ( - lib.is_integer(fill_value) - and needs_i8_conversion(self.blocks[0].dtype) - ) - ) - ) - ): - # GH#35488 we need to watch out for multi-block cases - # We only get here with fill_value not-lib.no_default - ncols = self.shape[0] - nper = abs(periods) - nper = min(nper, ncols) - if periods > 0: - indexer = np.array( - [-1] * nper + list(range(ncols - periods)), dtype=np.intp - ) - else: - indexer = np.array( - list(range(nper, ncols)) + [-1] * nper, dtype=np.intp - ) - result = self.reindex_indexer( - self.items, - indexer, - axis=0, - fill_value=fill_value, - allow_dups=True, - ) - return result - return self.apply("shift", periods=periods, axis=axis, fill_value=fill_value) def fillna(self: T, value, limit, inplace: bool, downcast) -> T: diff --git a/pandas/tests/frame/methods/test_shift.py b/pandas/tests/frame/methods/test_shift.py index 2463e81d78edd..c053289bbe11a 100644 --- a/pandas/tests/frame/methods/test_shift.py +++ b/pandas/tests/frame/methods/test_shift.py @@ -611,14 +611,8 @@ def test_shift_dt64values_int_fill_deprecated(self): ) # TODO(2.0): remove filtering @pytest.mark.filterwarnings("ignore:Index.ravel.*:FutureWarning") - def test_shift_dt64values_axis1_invalid_fill( - self, vals, as_cat, using_array_manager, request - ): + def test_shift_dt64values_axis1_invalid_fill(self, vals, as_cat, request): # GH#44564 - if using_array_manager: - mark = pytest.mark.xfail(raises=NotImplementedError) - request.node.add_marker(mark) - ser = Series(vals) if as_cat: ser = ser.astype("category") @@ -665,7 +659,6 @@ def test_shift_axis1_categorical_columns(self): ) tm.assert_frame_equal(result, expected) - @td.skip_array_manager_not_yet_implemented def test_shift_axis1_many_periods(self): # GH#44978 periods > len(columns) df = DataFrame(np.random.rand(5, 3)) From a25cb6d4bc3512fdefb48d97ec81d3fb0139999f Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 26 Jan 2022 13:25:29 -0800 Subject: [PATCH 2/2] more fixed xfails --- pandas/tests/apply/test_str.py | 11 +---------- pandas/tests/groupby/transform/test_transform.py | 8 ++------ 2 files changed, 3 insertions(+), 16 deletions(-) diff --git a/pandas/tests/apply/test_str.py b/pandas/tests/apply/test_str.py index 82997328529cd..31bd3fbfa60d3 100644 --- a/pandas/tests/apply/test_str.py +++ b/pandas/tests/apply/test_str.py @@ -256,19 +256,10 @@ def test_transform_groupby_kernel_series(string_series, op): @pytest.mark.parametrize("op", frame_transform_kernels) -def test_transform_groupby_kernel_frame( - axis, float_frame, op, using_array_manager, request -): +def test_transform_groupby_kernel_frame(axis, float_frame, op, request): # TODO(2.0) Remove after pad/backfill deprecation enforced op = maybe_normalize_deprecated_kernels(op) # GH 35964 - if using_array_manager and op == "pct_change" and axis in (1, "columns"): - # TODO(ArrayManager) shift with axis=1 - request.node.add_marker( - pytest.mark.xfail( - reason="shift axis=1 not yet implemented for ArrayManager" - ) - ) args = [0.0] if op == "fillna" else [] if axis == 0 or axis == "index": diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index 12a25a1e61211..6ef33513294a1 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -165,13 +165,9 @@ def test_transform_broadcast(tsframe, ts): assert_fp_equal(res.xs(idx), agged[idx]) -def test_transform_axis_1(request, transformation_func, using_array_manager): +def test_transform_axis_1(request, transformation_func): # GH 36308 - if using_array_manager and transformation_func == "pct_change": - # TODO(ArrayManager) column-wise shift - request.node.add_marker( - pytest.mark.xfail(reason="ArrayManager: shift axis=1 not yet implemented") - ) + # TODO(2.0) Remove after pad/backfill deprecation enforced transformation_func = maybe_normalize_deprecated_kernels(transformation_func)