Skip to content

Backport PR #42317 on branch 1.3.x (Revert "REF: move shift logic from BlockManager to DataFrame") #42321

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 17 additions & 34 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5285,45 +5285,28 @@ def shift(
axis = self._get_axis_number(axis)

ncols = len(self.columns)
if axis == 1 and periods != 0 and fill_value is lib.no_default and ncols > 0:
# We will infer fill_value to match the closest column

if (
axis == 1
and periods != 0
and ncols > 0
and (fill_value is lib.no_default or len(self._mgr.arrays) > 1)
):
# Exclude single-array-with-fill_value case so we issue a FutureWarning
# if an integer is passed with datetimelike dtype GH#31971
from pandas import concat
# Use a column that we know is valid for our column's dtype GH#38434
label = self.columns[0]

# tail: the data that is still in our shifted DataFrame
if periods > 0:
tail = self.iloc[:, :-periods]
else:
tail = self.iloc[:, -periods:]
# pin a simple Index to avoid costly casting
tail.columns = range(len(tail.columns))

if fill_value is not lib.no_default:
# GH#35488
# TODO(EA2D): with 2D EAs we could construct other directly
ser = Series(fill_value, index=self.index)
result = self.iloc[:, :-periods]
for col in range(min(ncols, abs(periods))):
# TODO(EA2D): doing this in a loop unnecessary with 2D EAs
# Define filler inside loop so we get a copy
filler = self.iloc[:, 0].shift(len(self))
result.insert(0, label, filler, allow_duplicates=True)
else:
# We infer fill_value to match the closest column
if periods > 0:
ser = self.iloc[:, 0].shift(len(self))
else:
ser = self.iloc[:, -1].shift(len(self))

width = min(abs(periods), ncols)
other = concat([ser] * width, axis=1)

if periods > 0:
result = concat([other, tail], axis=1)
else:
result = concat([tail, other], axis=1)
result = self.iloc[:, -periods:]
for col in range(min(ncols, abs(periods))):
# Define filler inside loop so we get a copy
filler = self.iloc[:, -1].shift(len(self))
result.insert(
len(result.columns), label, filler, allow_duplicates=True
)

result = cast(DataFrame, result)
result.columns = self.columns.copy()
return result

Expand Down
19 changes: 19 additions & 0 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,25 @@ def shift(self: T, periods: int, axis: int, fill_value) -> T:
if fill_value is lib.no_default:
fill_value = None

if axis == 0 and self.ndim == 2 and self.nblocks > 1:
# GH#35488 we need to watch out for multi-block cases
# We only get here with fill_value not-lib.no_default
ncols = self.shape[0]
if periods > 0:
indexer = [-1] * periods + list(range(ncols - periods))
else:
nper = abs(periods)
indexer = list(range(nper, ncols)) + [-1] * nper
result = self.reindex_indexer(
self.items,
indexer,
axis=0,
fill_value=fill_value,
allow_dups=True,
consolidate=False,
)
return result

return self.apply("shift", periods=periods, axis=axis, fill_value=fill_value)

def fillna(self: T, value, limit, inplace: bool, downcast) -> T:
Expand Down
9 changes: 8 additions & 1 deletion pandas/tests/apply/test_frame_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,15 @@ def test_transform_ufunc(axis, float_frame, frame_or_series):


@pytest.mark.parametrize("op", frame_transform_kernels)
def test_transform_groupby_kernel(axis, float_frame, op, request):
def test_transform_groupby_kernel(axis, float_frame, op, using_array_manager, request):
# GH 35964
if using_array_manager and op == "pct_change" and axis in (1, "columns"):
# TODO(ArrayManager) shift with axis=1
request.node.add_marker(
pytest.mark.xfail(
reason="shift axis=1 not yet implemented for ArrayManager"
)
)

args = [0.0] if op == "fillna" else []
if axis == 0 or axis == "index":
Expand Down