diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 185b0f4da2627..ebdc331a673ab 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -48,7 +48,6 @@ is_timedelta64_dtype, pandas_dtype, ) -from pandas.core.dtypes.concat import concat_categorical, concat_datetime from pandas.core.dtypes.dtypes import ExtensionDtype from pandas.core.dtypes.generic import ( ABCDataFrame, @@ -110,7 +109,6 @@ class Block(PandasObject): _can_consolidate = True _verify_integrity = True _validate_ndim = True - _concatenator = staticmethod(np.concatenate) def __init__(self, values, placement, ndim=None): self.ndim = self._check_ndim(values, ndim) @@ -309,16 +307,6 @@ def shape(self): def dtype(self): return self.values.dtype - def concat_same_type(self, to_concat): - """ - Concatenate list of single blocks of the same type. - """ - values = self._concatenator( - [blk.values for blk in to_concat], axis=self.ndim - 1 - ) - placement = self.mgr_locs if self.ndim == 2 else slice(len(values)) - return self.make_block_same_class(values, placement=placement) - def iget(self, i): return self.values[i] @@ -1770,14 +1758,6 @@ def _slice(self, slicer): return self.values[slicer] - def concat_same_type(self, to_concat): - """ - Concatenate list of single blocks of the same type. - """ - values = self._holder._concat_same_type([blk.values for blk in to_concat]) - placement = self.mgr_locs if self.ndim == 2 else slice(len(values)) - return self.make_block_same_class(values, placement=placement) - def fillna(self, value, limit=None, inplace=False, downcast=None): values = self.values if inplace else self.values.copy() values = values.fillna(value=value, limit=limit) @@ -2258,20 +2238,6 @@ def diff(self, n: int, axis: int = 0) -> List["Block"]: new_values = new_values.astype("timedelta64[ns]") return [TimeDeltaBlock(new_values, placement=self.mgr_locs.indexer)] - def concat_same_type(self, to_concat): - # need to handle concat([tz1, tz2]) here, since DatetimeArray - # only handles cases where all the tzs are the same. - # Instead of placing the condition here, it could also go into the - # is_uniform_join_units check, but I'm not sure what is better. - if len({x.dtype for x in to_concat}) > 1: - values = concat_datetime([x.values for x in to_concat]) - - values = values.astype(object, copy=False) - placement = self.mgr_locs if self.ndim == 2 else slice(len(values)) - - return self.make_block(values, placement=placement) - return super().concat_same_type(to_concat) - def fillna(self, value, limit=None, inplace=False, downcast=None): # We support filling a DatetimeTZ with a `value` whose timezone # is different by coercing to object. @@ -2642,7 +2608,6 @@ class CategoricalBlock(ExtensionBlock): is_categorical = True _verify_integrity = True _can_hold_na = True - _concatenator = staticmethod(concat_categorical) should_store = Block.should_store @@ -2656,26 +2621,6 @@ def __init__(self, values, placement, ndim=None): def _holder(self): return Categorical - def concat_same_type(self, to_concat): - """ - Concatenate list of single blocks of the same type. - - Note that this CategoricalBlock._concat_same_type *may* not - return a CategoricalBlock. When the categories in `to_concat` - differ, this will return an object ndarray. - - If / when we decide we don't like that behavior: - - 1. Change Categorical._concat_same_type to use union_categoricals - 2. Delete this method. - """ - values = self._concatenator( - [blk.values for blk in to_concat], axis=self.ndim - 1 - ) - placement = self.mgr_locs if self.ndim == 2 else slice(len(values)) - # not using self.make_block_same_class as values can be object dtype - return self.make_block(values, placement=placement) - def replace( self, to_replace, diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index 720e6799a3bf3..37e081aeba3f6 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -1,6 +1,7 @@ # TODO: Needs a better name; too many modules are already called "concat" from collections import defaultdict import copy +from typing import List import numpy as np @@ -61,8 +62,18 @@ def concatenate_block_managers( values = values.view() b = b.make_block_same_class(values, placement=placement) elif _is_uniform_join_units(join_units): - b = join_units[0].block.concat_same_type([ju.block for ju in join_units]) - b.mgr_locs = placement + blk = join_units[0].block + vals = [ju.block.values for ju in join_units] + + if not blk.is_extension or blk.is_datetimetz or blk.is_categorical: + # datetimetz and categorical can have the same type but multiple + # dtypes, concatting does not necessarily preserve dtype + values = concat_compat(vals, axis=blk.ndim - 1) + else: + # TODO(EA2D): special-casing not needed with 2D EAs + values = concat_compat(vals) + + b = make_block(values, placement=placement, ndim=blk.ndim) else: b = make_block( _concatenate_join_units(join_units, concat_axis, copy=copy), @@ -419,13 +430,15 @@ def _get_empty_dtype_and_na(join_units): raise AssertionError(msg) -def _is_uniform_join_units(join_units) -> bool: +def _is_uniform_join_units(join_units: List[JoinUnit]) -> bool: """ Check if the join units consist of blocks of uniform type that can be concatenated using Block.concat_same_type instead of the generic _concatenate_join_units (which uses `concat_compat`). """ + # TODO: require dtype match in addition to same type? e.g. DatetimeTZBlock + # cannot necessarily join return ( # all blocks need to have the same type all(type(ju.block) is type(join_units[0].block) for ju in join_units) diff --git a/pandas/tests/extension/test_external_block.py b/pandas/tests/extension/test_external_block.py index 9925fd51561ae..1843126898f3d 100644 --- a/pandas/tests/extension/test_external_block.py +++ b/pandas/tests/extension/test_external_block.py @@ -32,12 +32,6 @@ def df(): return pd.DataFrame(block_manager) -def test_concat_dataframe(df): - # GH17728 - res = pd.concat([df, df]) - assert isinstance(res._mgr.blocks[1], CustomBlock) - - def test_concat_axis1(df): # GH17954 df2 = pd.DataFrame({"c": [0.1, 0.2, 0.3]})