diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 2630c07814bb2..59301391a7dad 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -6,7 +6,16 @@ import numpy as np -from pandas._libs import NaT, algos as libalgos, internals as libinternals, lib, writers +from pandas._libs import ( + Interval, + NaT, + Period, + Timestamp, + algos as libalgos, + internals as libinternals, + lib, + writers, +) from pandas._libs.internals import BlockPlacement from pandas._libs.tslibs import conversion from pandas._libs.tslibs.timezones import tz_compare @@ -41,17 +50,15 @@ is_float_dtype, is_integer, is_integer_dtype, - is_interval_dtype, is_list_like, is_object_dtype, - is_period_dtype, is_re, is_re_compilable, is_sparse, is_timedelta64_dtype, pandas_dtype, ) -from pandas.core.dtypes.dtypes import ExtensionDtype +from pandas.core.dtypes.dtypes import CategoricalDtype, ExtensionDtype from pandas.core.dtypes.generic import ABCDataFrame, ABCIndex, ABCPandasArray, ABCSeries from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna, isna_compat @@ -2629,36 +2636,38 @@ def get_block_type(values, dtype=None): ------- cls : class, subclass of Block """ + # We use vtype and kind checks because they are much more performant + # than is_foo_dtype dtype = dtype or values.dtype vtype = dtype.type + kind = dtype.kind cls: Type[Block] if is_sparse(dtype): # Need this first(ish) so that Sparse[datetime] is sparse cls = ExtensionBlock - elif is_categorical_dtype(values.dtype): + elif isinstance(dtype, CategoricalDtype): cls = CategoricalBlock - elif issubclass(vtype, np.datetime64): - assert not is_datetime64tz_dtype(values.dtype) - cls = DatetimeBlock - elif is_datetime64tz_dtype(values.dtype): + elif vtype is Timestamp: cls = DatetimeTZBlock - elif is_interval_dtype(dtype) or is_period_dtype(dtype): + elif vtype is Interval or vtype is Period: cls = ObjectValuesExtensionBlock - elif is_extension_array_dtype(values.dtype): + elif isinstance(dtype, ExtensionDtype): # Note: need to be sure PandasArray is unwrapped before we get here cls = ExtensionBlock - elif issubclass(vtype, np.floating): - cls = FloatBlock - elif issubclass(vtype, np.timedelta64): - assert issubclass(vtype, np.integer) + + elif kind == "M": + cls = DatetimeBlock + elif kind == "m": cls = TimeDeltaBlock - elif issubclass(vtype, np.complexfloating): + elif kind == "f": + cls = FloatBlock + elif kind == "c": cls = ComplexBlock - elif issubclass(vtype, np.integer): + elif kind == "i" or kind == "u": cls = IntBlock - elif dtype == np.bool_: + elif kind == "b": cls = BoolBlock else: cls = ObjectBlock diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index 06de1972b4c9a..dd3a04ccb38e2 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -70,14 +70,21 @@ def concatenate_block_managers( vals = [ju.block.values for ju in join_units] if not blk.is_extension: - values = concat_compat(vals, axis=blk.ndim - 1) + # _is_uniform_join_units ensures a single dtype, so + # we can use np.concatenate, which is more performant + # than concat_compat + values = np.concatenate(vals, axis=blk.ndim - 1) else: # TODO(EA2D): special-casing not needed with 2D EAs values = concat_compat(vals) if not isinstance(values, ExtensionArray): values = values.reshape(1, len(values)) - b = make_block(values, placement=placement, ndim=blk.ndim) + if blk.values.dtype == values.dtype: + # Fast-path + b = blk.make_block_same_class(values, placement=placement) + else: + b = make_block(values, placement=placement, ndim=blk.ndim) else: b = make_block( _concatenate_join_units(join_units, concat_axis, copy=copy), diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 4a2629daf63d7..42b541bd4cb02 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -18,6 +18,7 @@ import numpy as np from pandas._typing import FrameOrSeriesUnion, Label +from pandas.util._decorators import cache_readonly from pandas.core.dtypes.concat import concat_compat from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries @@ -360,7 +361,7 @@ def __init__( if len(objs) == 0: raise ValueError("All objects passed were None") - # consolidate data & figure out what our result ndim is going to be + # figure out what our result ndim is going to be ndims = set() for obj in objs: if not isinstance(obj, (ABCSeries, ABCDataFrame)): @@ -370,8 +371,6 @@ def __init__( ) raise TypeError(msg) - # consolidate - obj._consolidate_inplace() ndims.add(obj.ndim) # get the sample @@ -543,7 +542,7 @@ def _get_result_dim(self) -> int: def _get_new_axes(self) -> List[Index]: ndim = self._get_result_dim() return [ - self._get_concat_axis() if i == self.bm_axis else self._get_comb_axis(i) + self._get_concat_axis if i == self.bm_axis else self._get_comb_axis(i) for i in range(ndim) ] @@ -557,6 +556,7 @@ def _get_comb_axis(self, i: int) -> Index: copy=self.copy, ) + @cache_readonly def _get_concat_axis(self) -> Index: """ Return index to be used along concatenation axis.