From 4dff5b96db1930afbb17e5c47a22c7a27a758402 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 9 Mar 2021 13:52:32 -0800 Subject: [PATCH 1/4] REF: de-duplicate interleaved_dtype --- pandas/core/internals/array_manager.py | 27 +++------------------- pandas/core/internals/base.py | 23 +++++++++++++++++++ pandas/core/internals/blocks.py | 2 +- pandas/core/internals/managers.py | 31 +++++--------------------- 4 files changed, 32 insertions(+), 51 deletions(-) diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 97a2d4037bf26..7e195cadffe54 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -22,14 +22,12 @@ ) from pandas._typing import ( ArrayLike, - DtypeObj, Hashable, ) from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.cast import ( astype_array_safe, - find_common_type, infer_dtype_from_scalar, soft_convert_objects, ) @@ -84,6 +82,7 @@ from pandas.core.internals.base import ( DataManager, SingleDataManager, + interleaved_dtype, ) from pandas.core.internals.blocks import new_block @@ -752,7 +751,7 @@ def as_array( copy = copy or na_value is not lib.no_default if not dtype: - dtype = _interleaved_dtype(self.arrays) + dtype = interleaved_dtype([arr.dtype for arr in self.arrays]) if isinstance(dtype, SparseDtype): dtype = dtype.subtype @@ -800,7 +799,7 @@ def fast_xs(self, loc: int) -> ArrayLike: ------- np.ndarray or ExtensionArray """ - dtype = _interleaved_dtype(self.arrays) + dtype = interleaved_dtype([arr.dtype for arr in self.arrays]) values = [arr[loc] for arr in self.arrays] if isinstance(dtype, ExtensionDtype): @@ -1096,26 +1095,6 @@ def unstack(self, unstacker, fill_value) -> ArrayManager: # TODO # equals # to_dict - # quantile - - -def _interleaved_dtype(blocks) -> Optional[DtypeObj]: - """ - Find the common dtype for `blocks`. - - Parameters - ---------- - blocks : List[Block] - - Returns - ------- - dtype : np.dtype, ExtensionDtype, or None - None is returned when `blocks` is empty. - """ - if not len(blocks): - return None - - return find_common_type([b.dtype for b in blocks]) class SingleArrayManager(ArrayManager, SingleDataManager): diff --git a/pandas/core/internals/base.py b/pandas/core/internals/base.py index 0e4b5ce2e7452..57e0fec310ba1 100644 --- a/pandas/core/internals/base.py +++ b/pandas/core/internals/base.py @@ -4,11 +4,15 @@ """ from typing import ( List, + Optional, TypeVar, ) +from pandas._typing import DtypeObj from pandas.errors import AbstractMethodError +from pandas.core.dtypes.cast import find_common_type + from pandas.core.base import PandasObject from pandas.core.indexes.api import ( Index, @@ -102,3 +106,22 @@ def equals(self, other: object) -> bool: class SingleDataManager(DataManager): ndim = 1 + + +def interleaved_dtype(dtypes: List[DtypeObj]) -> Optional[DtypeObj]: + """ + Find the common dtype for `blocks`. + + Parameters + ---------- + blocks : List[DtypeObj] + + Returns + ------- + dtype : np.dtype, ExtensionDtype, or None + None is returned when `blocks` is empty. + """ + if not len(dtypes): + return None + + return find_common_type(dtypes) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index e084db77692f5..166cdc939be03 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -512,7 +512,7 @@ def _split(self) -> List[Block]: for i, ref_loc in enumerate(self.mgr_locs): vals = self.values[slice(i, i + 1)] - nb = self.make_block(vals, [ref_loc]) + nb = self.make_block(vals, BlockPlacement(ref_loc)) new_blocks.append(nb) return new_blocks diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index b656c9e83e1a8..426b7de44130b 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -32,10 +32,7 @@ from pandas.errors import PerformanceWarning from pandas.util._validators import validate_bool_kwarg -from pandas.core.dtypes.cast import ( - find_common_type, - infer_dtype_from_scalar, -) +from pandas.core.dtypes.cast import infer_dtype_from_scalar from pandas.core.dtypes.common import ( DT64NS_DTYPE, is_dtype_equal, @@ -64,6 +61,7 @@ from pandas.core.internals.base import ( DataManager, SingleDataManager, + interleaved_dtype, ) from pandas.core.internals.blocks import ( Block, @@ -916,7 +914,7 @@ def _interleave( Items must be contained in the blocks """ if not dtype: - dtype = _interleaved_dtype(self.blocks) + dtype = interleaved_dtype([blk.dtype for blk in self.blocks]) # TODO: https://github.com/pandas-dev/pandas/issues/22791 # Give EAs some input on what happens here. Sparse needs this. @@ -981,7 +979,7 @@ def fast_xs(self, loc: int) -> ArrayLike: if len(self.blocks) == 1: return self.blocks[0].iget((slice(None), loc)) - dtype = _interleaved_dtype(self.blocks) + dtype = interleaved_dtype([blk.dtype for blk in self.blocks]) n = len(self) if is_extension_array_dtype(dtype): @@ -1320,7 +1318,7 @@ def reindex_indexer( new_blocks = [ blk.take_nd( indexer, - axis=axis, + axis=1, fill_value=( fill_value if fill_value is not None else blk.fill_value ), @@ -1892,25 +1890,6 @@ def _stack_arrays(tuples, dtype: np.dtype): return stacked, placement -def _interleaved_dtype(blocks: Sequence[Block]) -> Optional[DtypeObj]: - """ - Find the common dtype for `blocks`. - - Parameters - ---------- - blocks : List[Block] - - Returns - ------- - dtype : np.dtype, ExtensionDtype, or None - None is returned when `blocks` is empty. - """ - if not len(blocks): - return None - - return find_common_type([b.dtype for b in blocks]) - - def _consolidate(blocks: Tuple[Block, ...]) -> List[Block]: """ Merge blocks having same dtype, exclude non-consolidating blocks From 446a5b8d0a232a2796123621ff03afeb186fc008 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 9 Mar 2021 17:48:27 -0800 Subject: [PATCH 2/4] REF: share code ArrayManager/BlockManager --- pandas/core/internals/array_manager.py | 14 ------------- pandas/core/internals/base.py | 27 +++++++++++++++++++++++++- pandas/core/internals/managers.py | 16 --------------- 3 files changed, 26 insertions(+), 31 deletions(-) diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 7e195cadffe54..5c8fb99dde5ac 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -154,22 +154,11 @@ def axes(self) -> List[Index]: # type: ignore[override] """Axes is BlockManager-compatible order (columns, rows)""" return [self._axes[1], self._axes[0]] - @property - def shape(self) -> Tuple[int, ...]: - # this still gives the BlockManager-compatible transposed shape - return tuple(len(ax) for ax in self.axes) - @property def shape_proper(self) -> Tuple[int, ...]: # this returns (n_rows, n_columns) return tuple(len(ax) for ax in self._axes) - @staticmethod - def _normalize_axis(axis): - # switch axis - axis = 1 if axis == 0 else 0 - return axis - def set_axis( self, axis: int, new_labels: Index, verify_integrity: bool = True ) -> None: @@ -510,9 +499,6 @@ def quantile( axes = [qs, self._axes[1]] return type(self)(new_arrs, axes) - def isna(self, func) -> ArrayManager: - return self.apply("apply", func=func) - def where(self, other, cond, align: bool, errors: str, axis: int) -> ArrayManager: if align: align_keys = ["other", "cond"] diff --git a/pandas/core/internals/base.py b/pandas/core/internals/base.py index 57e0fec310ba1..0a56f993447e6 100644 --- a/pandas/core/internals/base.py +++ b/pandas/core/internals/base.py @@ -8,7 +8,10 @@ TypeVar, ) -from pandas._typing import DtypeObj +from pandas._typing import ( + DtypeObj, + Shape, +) from pandas.errors import AbstractMethodError from pandas.core.dtypes.cast import find_common_type @@ -39,6 +42,16 @@ def __len__(self) -> int: def ndim(self) -> int: return len(self.axes) + @property + def shape(self) -> Shape: + return tuple(len(ax) for ax in self.axes) + + def _normalize_axis(self, axis: int) -> int: + # switch axis + if self.ndim == 2: + axis = 1 if axis == 0 else 0 + return axis + def reindex_indexer( self: T, new_axis, @@ -103,6 +116,18 @@ def equals(self, other: object) -> bool: return self._equal_values(other) + def apply( + self: T, + f, + align_keys: Optional[List[str]] = None, + ignore_failures: bool = False, + **kwargs, + ) -> T: + raise AbstractMethodError(self) + + def isna(self: T, func) -> T: + return self.apply("apply", func=func) + class SingleDataManager(DataManager): ndim = 1 diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 426b7de44130b..8e461eeda7196 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -245,16 +245,6 @@ def __nonzero__(self) -> bool: # Python3 compat __bool__ = __nonzero__ - @property - def shape(self) -> Shape: - return tuple(len(ax) for ax in self.axes) - - def _normalize_axis(self, axis): - # switch axis to follow BlockManager logic - if self.ndim == 2: - axis = 1 if axis == 0 else 0 - return axis - def set_axis( self, axis: int, new_labels: Index, verify_integrity: bool = True ) -> None: @@ -360,9 +350,6 @@ def _post_setstate(self) -> None: self._known_consolidated = False self._rebuild_blknos_and_blklocs() - def __len__(self) -> int: - return len(self.items) - def __repr__(self) -> str: output = type(self).__name__ for i, ax in enumerate(self.axes): @@ -574,9 +561,6 @@ def quantile( return type(self)(blocks, new_axes) - def isna(self, func) -> BlockManager: - return self.apply("apply", func=func) - def where(self, other, cond, align: bool, errors: str, axis: int) -> BlockManager: axis = self._normalize_axis(axis) if align: From a0d602d5fbc797f01479eb37223efa4a89ceab21 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 10 Mar 2021 07:42:25 -0800 Subject: [PATCH 3/4] revert normalize_axis move --- pandas/core/internals/array_manager.py | 6 ++++++ pandas/core/internals/base.py | 6 ------ pandas/core/internals/managers.py | 6 ++++++ 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 1d56f1fe49f9c..0a1317dd81a96 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -162,6 +162,12 @@ def shape_proper(self) -> Tuple[int, ...]: # this returns (n_rows, n_columns) return tuple(len(ax) for ax in self._axes) + @staticmethod + def _normalize_axis(axis: int) -> int: + # switch axis + axis = 1 if axis == 0 else 0 + return axis + def set_axis( self, axis: int, new_labels: Index, verify_integrity: bool = True ) -> None: diff --git a/pandas/core/internals/base.py b/pandas/core/internals/base.py index 0a56f993447e6..c5bb2283d23e4 100644 --- a/pandas/core/internals/base.py +++ b/pandas/core/internals/base.py @@ -46,12 +46,6 @@ def ndim(self) -> int: def shape(self) -> Shape: return tuple(len(ax) for ax in self.axes) - def _normalize_axis(self, axis: int) -> int: - # switch axis - if self.ndim == 2: - axis = 1 if axis == 0 else 0 - return axis - def reindex_indexer( self: T, new_axis, diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index b8bae59445ce3..6d84e6d822798 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -245,6 +245,12 @@ def __nonzero__(self) -> bool: # Python3 compat __bool__ = __nonzero__ + def _normalize_axis(self, axis: int) -> int: + # switch axis + if self.ndim == 2: + axis = 1 if axis == 0 else 0 + return axis + def set_axis( self, axis: int, new_labels: Index, verify_integrity: bool = True ) -> None: From dd30715ecfd3157b86ab3aa1efc9cee9b4760464 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 11 Mar 2021 10:24:58 +0100 Subject: [PATCH 4/4] Update pandas/core/internals/managers.py --- pandas/core/internals/managers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 6d84e6d822798..ddd7b9a7e6784 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -246,7 +246,7 @@ def __nonzero__(self) -> bool: __bool__ = __nonzero__ def _normalize_axis(self, axis: int) -> int: - # switch axis + # switch axis to follow BlockManager logic if self.ndim == 2: axis = 1 if axis == 0 else 0 return axis