diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 6e6b521ae7d74..a9bf24784d405 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -6,7 +6,6 @@ import itertools from typing import ( TYPE_CHECKING, - Any, Callable, Hashable, Literal, @@ -16,10 +15,8 @@ from pandas._libs import ( NaT, - algos as libalgos, lib, ) -from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.astype import ( astype_array, @@ -261,10 +258,11 @@ def apply( # expected "List[Union[ndarray, ExtensionArray]]" return type(self)(result_arrays, new_axes) # type: ignore[arg-type] - def apply_with_block( - self, f, align_keys=None, swap_axis: bool = True, **kwargs - ) -> Self: + def apply_with_block(self, f, align_keys=None, **kwargs) -> Self: # switch axis to follow BlockManager logic + swap_axis = True + if f == "interpolate": + swap_axis = False if swap_axis and "axis" in kwargs and self.ndim == 2: kwargs["axis"] = 1 if kwargs["axis"] == 0 else 0 @@ -319,50 +317,13 @@ def apply_with_block( return type(self)(result_arrays, self._axes) - def where(self, other, cond, align: bool) -> Self: - if align: - align_keys = ["other", "cond"] - else: - align_keys = ["cond"] - other = extract_array(other, extract_numpy=True) - - return self.apply_with_block( - "where", - align_keys=align_keys, - other=other, - cond=cond, - ) - - def round(self, decimals: int, using_cow: bool = False) -> Self: - return self.apply_with_block("round", decimals=decimals, using_cow=using_cow) - def setitem(self, indexer, value) -> Self: return self.apply_with_block("setitem", indexer=indexer, value=value) - def putmask(self, mask, new, align: bool = True) -> Self: - if align: - align_keys = ["new", "mask"] - else: - align_keys = ["mask"] - new = extract_array(new, extract_numpy=True) - - return self.apply_with_block( - "putmask", - align_keys=align_keys, - mask=mask, - new=new, - ) - def diff(self, n: int) -> Self: assert self.ndim == 2 # caller ensures return self.apply(algos.diff, n=n) - def pad_or_backfill(self, **kwargs) -> Self: - return self.apply_with_block("pad_or_backfill", swap_axis=False, **kwargs) - - def interpolate(self, **kwargs) -> Self: - return self.apply_with_block("interpolate", swap_axis=False, **kwargs) - def shift(self, periods: int, axis: AxisInt, fill_value) -> Self: if fill_value is lib.no_default: fill_value = None @@ -375,15 +336,6 @@ def shift(self, periods: int, axis: AxisInt, fill_value) -> Self: "shift", periods=periods, axis=axis, fill_value=fill_value ) - def fillna(self, value, limit: int | None, inplace: bool, downcast) -> Self: - if limit is not None: - # Do this validation even if we go through one of the no-op paths - limit = libalgos.validate_limit(None, limit=limit) - - return self.apply_with_block( - "fillna", value=value, limit=limit, inplace=inplace, downcast=downcast - ) - def astype(self, dtype, copy: bool | None = False, errors: str = "raise") -> Self: if copy is None: copy = True @@ -410,36 +362,6 @@ def _convert(arr): return self.apply(_convert) - def replace_regex(self, **kwargs) -> Self: - return self.apply_with_block("_replace_regex", **kwargs) - - def replace(self, to_replace, value, inplace: bool) -> Self: - inplace = validate_bool_kwarg(inplace, "inplace") - assert np.ndim(value) == 0, value - # TODO "replace" is right now implemented on the blocks, we should move - # it to general array algos so it can be reused here - return self.apply_with_block( - "replace", value=value, to_replace=to_replace, inplace=inplace - ) - - def replace_list( - self, - src_list: list[Any], - dest_list: list[Any], - inplace: bool = False, - regex: bool = False, - ) -> Self: - """do a list replace""" - inplace = validate_bool_kwarg(inplace, "inplace") - - return self.apply_with_block( - "replace_list", - src_list=src_list, - dest_list=dest_list, - inplace=inplace, - regex=regex, - ) - def to_native_types(self, **kwargs) -> Self: return self.apply(to_native_types, **kwargs) diff --git a/pandas/core/internals/base.py b/pandas/core/internals/base.py index 8f771221c8890..14aa2cc2716b9 100644 --- a/pandas/core/internals/base.py +++ b/pandas/core/internals/base.py @@ -6,13 +6,21 @@ from typing import ( TYPE_CHECKING, + Any, Literal, final, ) import numpy as np +from pandas._config import using_copy_on_write + +from pandas._libs import ( + algos as libalgos, + lib, +) from pandas.errors import AbstractMethodError +from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.cast import ( find_common_type, @@ -20,6 +28,7 @@ ) from pandas.core.base import PandasObject +from pandas.core.construction import extract_array from pandas.core.indexes.api import ( Index, default_index, @@ -138,10 +147,128 @@ def apply( ) -> Self: raise AbstractMethodError(self) + def apply_with_block( + self, + f, + align_keys: list[str] | None = None, + **kwargs, + ) -> Self: + raise AbstractMethodError(self) + @final def isna(self, func) -> Self: return self.apply("apply", func=func) + @final + def fillna(self, value, limit: int | None, inplace: bool, downcast) -> Self: + if limit is not None: + # Do this validation even if we go through one of the no-op paths + limit = libalgos.validate_limit(None, limit=limit) + + return self.apply_with_block( + "fillna", + value=value, + limit=limit, + inplace=inplace, + downcast=downcast, + using_cow=using_copy_on_write(), + ) + + @final + def where(self, other, cond, align: bool) -> Self: + if align: + align_keys = ["other", "cond"] + else: + align_keys = ["cond"] + other = extract_array(other, extract_numpy=True) + + return self.apply_with_block( + "where", + align_keys=align_keys, + other=other, + cond=cond, + using_cow=using_copy_on_write(), + ) + + @final + def putmask(self, mask, new, align: bool = True) -> Self: + if align: + align_keys = ["new", "mask"] + else: + align_keys = ["mask"] + new = extract_array(new, extract_numpy=True) + + return self.apply_with_block( + "putmask", + align_keys=align_keys, + mask=mask, + new=new, + using_cow=using_copy_on_write(), + ) + + @final + def round(self, decimals: int, using_cow: bool = False) -> Self: + return self.apply_with_block( + "round", + decimals=decimals, + using_cow=using_cow, + ) + + @final + def replace(self, to_replace, value, inplace: bool) -> Self: + inplace = validate_bool_kwarg(inplace, "inplace") + # NDFrame.replace ensures the not-is_list_likes here + assert not lib.is_list_like(to_replace) + assert not lib.is_list_like(value) + return self.apply_with_block( + "replace", + to_replace=to_replace, + value=value, + inplace=inplace, + using_cow=using_copy_on_write(), + ) + + @final + def replace_regex(self, **kwargs) -> Self: + return self.apply_with_block( + "_replace_regex", **kwargs, using_cow=using_copy_on_write() + ) + + @final + def replace_list( + self, + src_list: list[Any], + dest_list: list[Any], + inplace: bool = False, + regex: bool = False, + ) -> Self: + """do a list replace""" + inplace = validate_bool_kwarg(inplace, "inplace") + + bm = self.apply_with_block( + "replace_list", + src_list=src_list, + dest_list=dest_list, + inplace=inplace, + regex=regex, + using_cow=using_copy_on_write(), + ) + bm._consolidate_inplace() + return bm + + def interpolate(self, inplace: bool, **kwargs) -> Self: + return self.apply_with_block( + "interpolate", inplace=inplace, **kwargs, using_cow=using_copy_on_write() + ) + + def pad_or_backfill(self, inplace: bool, **kwargs) -> Self: + return self.apply_with_block( + "pad_or_backfill", + inplace=inplace, + **kwargs, + using_cow=using_copy_on_write(), + ) + # -------------------------------------------------------------------- # Consolidation: No-ops for all but BlockManager diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 6caca7cbe71dc..f22afbdb68d65 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -3,7 +3,6 @@ import itertools from typing import ( TYPE_CHECKING, - Any, Callable, Hashable, Literal, @@ -18,7 +17,6 @@ from pandas._config import using_copy_on_write from pandas._libs import ( - algos as libalgos, internals as libinternals, lib, ) @@ -29,7 +27,6 @@ from pandas.errors import PerformanceWarning from pandas.util._decorators import cache_readonly from pandas.util._exceptions import find_stack_level -from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.cast import infer_dtype_from_scalar from pandas.core.dtypes.common import ( @@ -358,27 +355,8 @@ def apply( out = type(self).from_blocks(result_blocks, self.axes) return out - def where(self, other, cond, align: bool) -> Self: - if align: - align_keys = ["other", "cond"] - else: - align_keys = ["cond"] - other = extract_array(other, extract_numpy=True) - - return self.apply( - "where", - align_keys=align_keys, - other=other, - cond=cond, - using_cow=using_copy_on_write(), - ) - - def round(self, decimals: int, using_cow: bool = False) -> Self: - return self.apply( - "round", - decimals=decimals, - using_cow=using_cow, - ) + # Alias so we can share code with ArrayManager + apply_with_block = apply def setitem(self, indexer, value) -> Self: """ @@ -396,38 +374,10 @@ def setitem(self, indexer, value) -> Self: return self.apply("setitem", indexer=indexer, value=value) - def putmask(self, mask, new, align: bool = True) -> Self: - if align: - align_keys = ["new", "mask"] - else: - align_keys = ["mask"] - new = extract_array(new, extract_numpy=True) - - return self.apply( - "putmask", - align_keys=align_keys, - mask=mask, - new=new, - using_cow=using_copy_on_write(), - ) - def diff(self, n: int) -> Self: # only reached with self.ndim == 2 return self.apply("diff", n=n) - def pad_or_backfill(self, inplace: bool, **kwargs) -> Self: - return self.apply( - "pad_or_backfill", - inplace=inplace, - **kwargs, - using_cow=using_copy_on_write(), - ) - - def interpolate(self, inplace: bool, **kwargs) -> Self: - return self.apply( - "interpolate", inplace=inplace, **kwargs, using_cow=using_copy_on_write() - ) - def shift(self, periods: int, axis: AxisInt, fill_value) -> Self: axis = self._normalize_axis(axis) if fill_value is lib.no_default: @@ -435,20 +385,6 @@ def shift(self, periods: int, axis: AxisInt, fill_value) -> Self: return self.apply("shift", periods=periods, axis=axis, fill_value=fill_value) - def fillna(self, value, limit: int | None, inplace: bool, downcast) -> Self: - if limit is not None: - # Do this validation even if we go through one of the no-op paths - limit = libalgos.validate_limit(None, limit=limit) - - return self.apply( - "fillna", - value=value, - limit=limit, - inplace=inplace, - downcast=downcast, - using_cow=using_copy_on_write(), - ) - def astype(self, dtype, copy: bool | None = False, errors: str = "raise") -> Self: if copy is None: if using_copy_on_write(): @@ -477,43 +413,6 @@ def convert(self, copy: bool | None) -> Self: return self.apply("convert", copy=copy, using_cow=using_copy_on_write()) - def replace(self, to_replace, value, inplace: bool) -> Self: - inplace = validate_bool_kwarg(inplace, "inplace") - # NDFrame.replace ensures the not-is_list_likes here - assert not is_list_like(to_replace) - assert not is_list_like(value) - return self.apply( - "replace", - to_replace=to_replace, - value=value, - inplace=inplace, - using_cow=using_copy_on_write(), - ) - - def replace_regex(self, **kwargs) -> Self: - return self.apply("_replace_regex", **kwargs, using_cow=using_copy_on_write()) - - def replace_list( - self, - src_list: list[Any], - dest_list: list[Any], - inplace: bool = False, - regex: bool = False, - ) -> Self: - """do a list replace""" - inplace = validate_bool_kwarg(inplace, "inplace") - - bm = self.apply( - "replace_list", - src_list=src_list, - dest_list=dest_list, - inplace=inplace, - regex=regex, - using_cow=using_copy_on_write(), - ) - bm._consolidate_inplace() - return bm - def to_native_types(self, **kwargs) -> Self: """ Convert values to native types (strings / python objects) that are used