diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 98afc5ac3a0e3..c59499e1bee5c 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -3,12 +3,12 @@ import itertools import operator import re -from typing import Dict, List, Optional, Sequence, Tuple, Union +from typing import Dict, List, Optional, Sequence, Tuple, TypeVar, Union import numpy as np from pandas._libs import Timedelta, Timestamp, internals as libinternals, lib -from pandas._typing import DtypeObj, Label +from pandas._typing import ArrayLike, DtypeObj, Label from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.cast import ( @@ -58,6 +58,8 @@ # TODO: flexible with index=None and/or items=None +T = TypeVar("T", bound="BlockManager") + class BlockManager(PandasObject): """ @@ -149,6 +151,13 @@ def __init__( self._blknos = None self._blklocs = None + @classmethod + def from_blocks(cls, blocks: List[Block], axes: List[Index]): + """ + Constructor for BlockManager and SingleBlockManager with same signature. + """ + return cls(blocks, axes, do_integrity_check=False) + @property def blknos(self): """ @@ -176,7 +185,7 @@ def blklocs(self): return self._blklocs - def make_empty(self, axes=None) -> "BlockManager": + def make_empty(self: T, axes=None) -> T: """ return an empty BlockManager with the items axis of len 0 """ if axes is None: axes = [Index([])] + self.axes[1:] @@ -184,10 +193,11 @@ def make_empty(self, axes=None) -> "BlockManager": # preserve dtype if possible if self.ndim == 1: assert isinstance(self, SingleBlockManager) # for mypy - blocks = np.array([], dtype=self.array_dtype) + arr = np.array([], dtype=self.array_dtype) + blocks = [make_block(arr, placement=slice(0, 0), ndim=1)] else: blocks = [] - return type(self)(blocks, axes) + return type(self).from_blocks(blocks, axes) def __nonzero__(self) -> bool: return True @@ -380,7 +390,7 @@ def reduce(self, func, *args, **kwargs): return res - def apply(self, f, filter=None, **kwargs) -> "BlockManager": + def apply(self: T, f, filter=None, **kwargs) -> T: """ Iterate over the blocks, collect and create a new BlockManager. @@ -458,8 +468,8 @@ def apply(self, f, filter=None, **kwargs) -> "BlockManager": if len(result_blocks) == 0: return self.make_empty(self.axes) - bm = type(self)(result_blocks, self.axes, do_integrity_check=False) - return bm + + return type(self).from_blocks(result_blocks, self.axes) def quantile( self, @@ -658,7 +668,7 @@ def comp(s, regex=False): rb = new_rb result_blocks.extend(rb) - bm = type(self)(result_blocks, self.axes) + bm = type(self).from_blocks(result_blocks, self.axes) bm._consolidate_inplace() return bm @@ -747,7 +757,7 @@ def combine(self, blocks: List[Block], copy: bool = True) -> "BlockManager": axes = list(self.axes) axes[0] = self.items.take(indexer) - return type(self)(new_blocks, axes, do_integrity_check=False) + return type(self).from_blocks(new_blocks, axes) def get_slice(self, slobj: slice, axis: int = 0) -> "BlockManager": @@ -774,7 +784,7 @@ def __contains__(self, item) -> bool: def nblocks(self) -> int: return len(self.blocks) - def copy(self, deep=True) -> "BlockManager": + def copy(self: T, deep=True) -> T: """ Make deep or shallow copy of BlockManager @@ -1244,14 +1254,14 @@ def reindex_axis( ) def reindex_indexer( - self, + self: T, new_axis, indexer, axis: int, fill_value=None, - allow_dups=False, + allow_dups: bool = False, copy: bool = True, - ): + ) -> T: """ Parameters ---------- @@ -1299,7 +1309,8 @@ def reindex_indexer( new_axes = list(self.axes) new_axes[axis] = new_axis - return type(self)(new_blocks, new_axes) + + return type(self).from_blocks(new_blocks, new_axes) def _slice_take_blocks_ax0(self, slice_or_indexer, fill_tuple=None): """ @@ -1500,6 +1511,8 @@ def __init__( do_integrity_check: bool = False, fastpath: bool = False, ): + assert isinstance(block, Block), type(block) + if isinstance(axis, list): if len(axis) != 1: raise ValueError( @@ -1510,38 +1523,29 @@ def __init__( # passed from constructor, single block, single axis if fastpath: self.axes = [axis] - if isinstance(block, list): - - # empty block - if len(block) == 0: - block = [np.array([])] - elif len(block) != 1: - raise ValueError( - "Cannot create SingleBlockManager with more than 1 block" - ) - block = block[0] else: self.axes = [ensure_index(axis)] - # create the block here - if isinstance(block, list): - - # provide consolidation to the interleaved_dtype - if len(block) > 1: - dtype = _interleaved_dtype(block) - block = [b.astype(dtype) for b in block] - block = _consolidate(block) - - if len(block) != 1: - raise ValueError( - "Cannot create SingleBlockManager with more than 1 block" - ) - block = block[0] + self.blocks = tuple([block]) - if not isinstance(block, Block): - block = make_block(block, placement=slice(0, len(axis)), ndim=1) + @classmethod + def from_blocks( + cls, blocks: List[Block], axes: List[Index] + ) -> "SingleBlockManager": + """ + Constructor for BlockManager and SingleBlockManager with same signature. + """ + assert len(blocks) == 1 + assert len(axes) == 1 + return cls(blocks[0], axes[0], do_integrity_check=False, fastpath=True) - self.blocks = tuple([block]) + @classmethod + def from_array(cls, array: ArrayLike, index: Index) -> "SingleBlockManager": + """ + Constructor for if we have an array that is not yet a Block. + """ + block = make_block(array, placement=slice(0, len(index)), ndim=1) + return cls(block, index, fastpath=True) def _post_setstate(self): pass @@ -1568,7 +1572,10 @@ def get_slice(self, slobj: slice, axis: int = 0) -> "SingleBlockManager": if axis >= self.ndim: raise IndexError("Requested axis not found in manager") - return type(self)(self._block._slice(slobj), self.index[slobj], fastpath=True) + blk = self._block + array = blk._slice(slobj) + block = blk.make_block_same_class(array, placement=range(len(array))) + return type(self)(block, self.index[slobj], fastpath=True) @property def index(self) -> Index: @@ -1630,7 +1637,7 @@ def fast_xs(self, loc): """ raise NotImplementedError("Use series._values[loc] instead") - def concat(self, to_concat, new_axis) -> "SingleBlockManager": + def concat(self, to_concat, new_axis: Index) -> "SingleBlockManager": """ Concatenate a list of SingleBlockManagers into a single SingleBlockManager. diff --git a/pandas/core/series.py b/pandas/core/series.py index 568e99622dd29..c44c72077aa29 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -205,7 +205,7 @@ def __init__( # data is an ndarray, index is defined if not isinstance(data, SingleBlockManager): - data = SingleBlockManager(data, index, fastpath=True) + data = SingleBlockManager.from_array(data, index) if copy: data = data.copy() if index is None: @@ -317,7 +317,7 @@ def __init__( else: data = sanitize_array(data, index, dtype, copy, raise_cast_failure=True) - data = SingleBlockManager(data, index, fastpath=True) + data = SingleBlockManager.from_array(data, index) generic.NDFrame.__init__(self, data) self.name = name diff --git a/pandas/tests/extension/test_external_block.py b/pandas/tests/extension/test_external_block.py index 6311070cfe2bb..8a8dac54cf96a 100644 --- a/pandas/tests/extension/test_external_block.py +++ b/pandas/tests/extension/test_external_block.py @@ -2,7 +2,7 @@ import pytest import pandas as pd -from pandas.core.internals import BlockManager +from pandas.core.internals import BlockManager, SingleBlockManager from pandas.core.internals.blocks import Block, NonConsolidatableMixIn @@ -36,7 +36,8 @@ def test_concat_series(): # GH17728 values = np.arange(3, dtype="int64") block = CustomBlock(values, placement=slice(0, 3)) - s = pd.Series(block, pd.RangeIndex(3), fastpath=True) + mgr = SingleBlockManager(block, pd.RangeIndex(3)) + s = pd.Series(mgr, pd.RangeIndex(3), fastpath=True) res = pd.concat([s, s]) assert isinstance(res._data.blocks[0], CustomBlock)