Skip to content

REF: move logic of 'block manager axis' into the BlockManager #40075

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Mar 5, 2021
7 changes: 3 additions & 4 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -7843,12 +7843,11 @@ def diff(self, periods: int = 1, axis: Axis = 0) -> DataFrame:
raise ValueError("periods must be an integer")
periods = int(periods)

bm_axis = self._get_block_manager_axis(axis)

if bm_axis == 0 and periods != 0:
axis = self._get_axis_number(axis)
if axis == 1 and periods != 0:
return self - self.shift(periods, axis=axis)

new_data = self._mgr.diff(n=periods, axis=bm_axis)
new_data = self._mgr.diff(n=periods, axis=axis)
return self._constructor(new_data).__finalize__(self, "diff")

# ----------------------------------------------------------------------
Expand Down
8 changes: 3 additions & 5 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -8952,8 +8952,6 @@ def _where(
self._info_axis, axis=self._info_axis_number, copy=False
)

block_axis = self._get_block_manager_axis(axis)

if inplace:
# we may have different type blocks come out of putmask, so
# reconstruct the block manager
Expand All @@ -8969,7 +8967,7 @@ def _where(
cond=cond,
align=align,
errors=errors,
axis=block_axis,
axis=axis,
)
result = self._constructor(new_data)
return result.__finalize__(self)
Expand Down Expand Up @@ -9280,9 +9278,9 @@ def shift(

if freq is None:
# when freq is None, data is shifted, index is not
block_axis = self._get_block_manager_axis(axis)
axis = self._get_axis_number(axis)
new_data = self._mgr.shift(
periods=periods, axis=block_axis, fill_value=fill_value
periods=periods, axis=axis, fill_value=fill_value
)
return self._constructor(new_data).__finalize__(self, method="shift")

Expand Down
17 changes: 7 additions & 10 deletions pandas/core/internals/array_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,12 +406,7 @@ def apply(

return type(self)(result_arrays, new_axes)

def apply_2d(
self: T,
f,
ignore_failures: bool = False,
**kwargs,
) -> T:
def apply_2d(self: T, f, ignore_failures: bool = False, **kwargs) -> T:
"""
Variant of `apply`, but where the function should not be applied to
each column independently, but to the full data as a 2D array.
Expand All @@ -430,7 +425,10 @@ def apply_2d(

return type(self)(result_arrays, new_axes)

def apply_with_block(self: T, f, align_keys=None, **kwargs) -> T:
def apply_with_block(self: T, f, align_keys=None, swap_axis=True, **kwargs) -> T:
# switch axis to follow BlockManager logic
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this needed now that you have a _normalize_axis?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it is still needed here because this is the function that does the "use block method as fallback" (and the block method is still using its inverted axis logic).

(but I am actively removing the usage of apply_with_block, like #40196)

if swap_axis and "axis" in kwargs and self.ndim == 2:
kwargs["axis"] = 1 if kwargs["axis"] == 0 else 0

align_keys = align_keys or []
aligned_args = {k: kwargs[k] for k in align_keys}
Expand Down Expand Up @@ -542,7 +540,6 @@ def putmask(self, mask, new, align: bool = True):
)

def diff(self, n: int, axis: int) -> ArrayManager:
axis = self._normalize_axis(axis)
if axis == 1:
# DataFrame only calls this for n=0, in which case performing it
# with axis=0 is equivalent
Expand All @@ -551,13 +548,13 @@ def diff(self, n: int, axis: int) -> ArrayManager:
return self.apply(algos.diff, n=n, axis=axis)

def interpolate(self, **kwargs) -> ArrayManager:
return self.apply_with_block("interpolate", **kwargs)
return self.apply_with_block("interpolate", swap_axis=False, **kwargs)

def shift(self, periods: int, axis: int, fill_value) -> ArrayManager:
if fill_value is lib.no_default:
fill_value = None

if axis == 0 and self.ndim == 2:
if axis == 1 and self.ndim == 2:
# TODO column-wise shift
raise NotImplementedError

Expand Down
9 changes: 9 additions & 0 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,12 @@ def shape(self) -> Shape:
def ndim(self) -> int:
return len(self.axes)

def _normalize_axis(self, axis):
# switch axis to follow BlockManager logic
if self.ndim == 2:
axis = 1 if axis == 0 else 0
return axis

def set_axis(
self, axis: int, new_labels: Index, verify_integrity: bool = True
) -> None:
Expand Down Expand Up @@ -560,6 +566,7 @@ def isna(self, func) -> BlockManager:
return self.apply("apply", func=func)

def where(self, other, cond, align: bool, errors: str, axis: int) -> BlockManager:
axis = self._normalize_axis(axis)
if align:
align_keys = ["other", "cond"]
else:
Expand Down Expand Up @@ -594,12 +601,14 @@ def putmask(self, mask, new, align: bool = True):
)

def diff(self, n: int, axis: int) -> BlockManager:
axis = self._normalize_axis(axis)
return self.apply("diff", n=n, axis=axis)

def interpolate(self, **kwargs) -> BlockManager:
return self.apply("interpolate", **kwargs)

def shift(self, periods: int, axis: int, fill_value) -> BlockManager:
axis = self._normalize_axis(axis)
if fill_value is lib.no_default:
fill_value = None

Expand Down