Skip to content

REF: share code ArrayManager/BlockManager #40338

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Mar 11, 2021
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 3 additions & 38 deletions pandas/core/internals/array_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,12 @@
)
from pandas._typing import (
ArrayLike,
DtypeObj,
Hashable,
)
from pandas.util._validators import validate_bool_kwarg

from pandas.core.dtypes.cast import (
astype_array_safe,
find_common_type,
infer_dtype_from_scalar,
soft_convert_objects,
)
Expand Down Expand Up @@ -84,6 +82,7 @@
from pandas.core.internals.base import (
DataManager,
SingleDataManager,
interleaved_dtype,
)
from pandas.core.internals.blocks import new_block

Expand Down Expand Up @@ -155,22 +154,11 @@ def axes(self) -> List[Index]: # type: ignore[override]
"""Axes is BlockManager-compatible order (columns, rows)"""
return [self._axes[1], self._axes[0]]

@property
def shape(self) -> Tuple[int, ...]:
# this still gives the BlockManager-compatible transposed shape
return tuple(len(ax) for ax in self.axes)

@property
def shape_proper(self) -> Tuple[int, ...]:
# this returns (n_rows, n_columns)
return tuple(len(ax) for ax in self._axes)

@staticmethod
def _normalize_axis(axis):
# switch axis
axis = 1 if axis == 0 else 0
return axis

def set_axis(
self, axis: int, new_labels: Index, verify_integrity: bool = True
) -> None:
Expand Down Expand Up @@ -511,9 +499,6 @@ def quantile(
axes = [qs, self._axes[1]]
return type(self)(new_arrs, axes)

def isna(self, func) -> ArrayManager:
return self.apply("apply", func=func)

def where(self, other, cond, align: bool, errors: str, axis: int) -> ArrayManager:
if align:
align_keys = ["other", "cond"]
Expand Down Expand Up @@ -752,7 +737,7 @@ def as_array(
copy = copy or na_value is not lib.no_default

if not dtype:
dtype = _interleaved_dtype(self.arrays)
dtype = interleaved_dtype([arr.dtype for arr in self.arrays])

if isinstance(dtype, SparseDtype):
dtype = dtype.subtype
Expand Down Expand Up @@ -800,7 +785,7 @@ def fast_xs(self, loc: int) -> ArrayLike:
-------
np.ndarray or ExtensionArray
"""
dtype = _interleaved_dtype(self.arrays)
dtype = interleaved_dtype([arr.dtype for arr in self.arrays])

values = [arr[loc] for arr in self.arrays]
if isinstance(dtype, ExtensionDtype):
Expand Down Expand Up @@ -1096,26 +1081,6 @@ def unstack(self, unstacker, fill_value) -> ArrayManager:
# TODO
# equals
# to_dict
# quantile


def _interleaved_dtype(blocks) -> Optional[DtypeObj]:
"""
Find the common dtype for `blocks`.

Parameters
----------
blocks : List[Block]

Returns
-------
dtype : np.dtype, ExtensionDtype, or None
None is returned when `blocks` is empty.
"""
if not len(blocks):
return None

return find_common_type([b.dtype for b in blocks])


class SingleArrayManager(ArrayManager, SingleDataManager):
Expand Down
48 changes: 48 additions & 0 deletions pandas/core/internals/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,18 @@
"""
from typing import (
List,
Optional,
TypeVar,
)

from pandas._typing import (
DtypeObj,
Shape,
)
from pandas.errors import AbstractMethodError

from pandas.core.dtypes.cast import find_common_type

from pandas.core.base import PandasObject
from pandas.core.indexes.api import (
Index,
Expand All @@ -35,6 +42,16 @@ def __len__(self) -> int:
def ndim(self) -> int:
return len(self.axes)

@property
def shape(self) -> Shape:
return tuple(len(ax) for ax in self.axes)

def _normalize_axis(self, axis: int) -> int:
# switch axis
if self.ndim == 2:
axis = 1 if axis == 0 else 0
return axis

def reindex_indexer(
self: T,
new_axis,
Expand Down Expand Up @@ -99,6 +116,37 @@ def equals(self, other: object) -> bool:

return self._equal_values(other)

def apply(
self: T,
f,
align_keys: Optional[List[str]] = None,
ignore_failures: bool = False,
**kwargs,
) -> T:
raise AbstractMethodError(self)

def isna(self: T, func) -> T:
return self.apply("apply", func=func)


class SingleDataManager(DataManager):
ndim = 1


def interleaved_dtype(dtypes: List[DtypeObj]) -> Optional[DtypeObj]:
"""
Find the common dtype for `blocks`.

Parameters
----------
blocks : List[DtypeObj]

Returns
-------
dtype : np.dtype, ExtensionDtype, or None
None is returned when `blocks` is empty.
"""
if not len(dtypes):
return None

return find_common_type(dtypes)
2 changes: 1 addition & 1 deletion pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,7 +512,7 @@ def _split(self) -> List[Block]:
for i, ref_loc in enumerate(self.mgr_locs):
vals = self.values[slice(i, i + 1)]

nb = self.make_block(vals, [ref_loc])
nb = self.make_block(vals, BlockPlacement(ref_loc))
new_blocks.append(nb)
return new_blocks

Expand Down
47 changes: 5 additions & 42 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,7 @@
from pandas.errors import PerformanceWarning
from pandas.util._validators import validate_bool_kwarg

from pandas.core.dtypes.cast import (
find_common_type,
infer_dtype_from_scalar,
)
from pandas.core.dtypes.cast import infer_dtype_from_scalar
from pandas.core.dtypes.common import (
DT64NS_DTYPE,
is_dtype_equal,
Expand Down Expand Up @@ -64,6 +61,7 @@
from pandas.core.internals.base import (
DataManager,
SingleDataManager,
interleaved_dtype,
)
from pandas.core.internals.blocks import (
Block,
Expand Down Expand Up @@ -247,16 +245,6 @@ def __nonzero__(self) -> bool:
# Python3 compat
__bool__ = __nonzero__

@property
def shape(self) -> Shape:
return tuple(len(ax) for ax in self.axes)

def _normalize_axis(self, axis):
# switch axis to follow BlockManager logic
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you leave this one in their respective classes?
Although the code is the same, they actually have the opposite purpose (they are used in different places in AM and BM, which probably points to the fact I should have named them differently ..), and while converting more methods to normal axis logic (xref #40075), the method will also change in AM code.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure, will update

if self.ndim == 2:
axis = 1 if axis == 0 else 0
return axis

def set_axis(
self, axis: int, new_labels: Index, verify_integrity: bool = True
) -> None:
Expand Down Expand Up @@ -362,9 +350,6 @@ def _post_setstate(self) -> None:
self._known_consolidated = False
self._rebuild_blknos_and_blklocs()

def __len__(self) -> int:
return len(self.items)

def __repr__(self) -> str:
output = type(self).__name__
for i, ax in enumerate(self.axes):
Expand Down Expand Up @@ -576,9 +561,6 @@ def quantile(

return type(self)(blocks, new_axes)

def isna(self, func) -> BlockManager:
return self.apply("apply", func=func)

def where(self, other, cond, align: bool, errors: str, axis: int) -> BlockManager:
axis = self._normalize_axis(axis)
if align:
Expand Down Expand Up @@ -916,7 +898,7 @@ def _interleave(
Items must be contained in the blocks
"""
if not dtype:
dtype = _interleaved_dtype(self.blocks)
dtype = interleaved_dtype([blk.dtype for blk in self.blocks])

# TODO: https://github.com/pandas-dev/pandas/issues/22791
# Give EAs some input on what happens here. Sparse needs this.
Expand Down Expand Up @@ -981,7 +963,7 @@ def fast_xs(self, loc: int) -> ArrayLike:
if len(self.blocks) == 1:
return self.blocks[0].iget((slice(None), loc))

dtype = _interleaved_dtype(self.blocks)
dtype = interleaved_dtype([blk.dtype for blk in self.blocks])

n = len(self)
if is_extension_array_dtype(dtype):
Expand Down Expand Up @@ -1320,7 +1302,7 @@ def reindex_indexer(
new_blocks = [
blk.take_nd(
indexer,
axis=axis,
axis=1,
fill_value=(
fill_value if fill_value is not None else blk.fill_value
),
Expand Down Expand Up @@ -1892,25 +1874,6 @@ def _stack_arrays(tuples, dtype: np.dtype):
return stacked, placement


def _interleaved_dtype(blocks: Sequence[Block]) -> Optional[DtypeObj]:
"""
Find the common dtype for `blocks`.

Parameters
----------
blocks : List[Block]

Returns
-------
dtype : np.dtype, ExtensionDtype, or None
None is returned when `blocks` is empty.
"""
if not len(blocks):
return None

return find_common_type([b.dtype for b in blocks])


def _consolidate(blocks: Tuple[Block, ...]) -> List[Block]:
"""
Merge blocks having same dtype, exclude non-consolidating blocks
Expand Down