Skip to content

Commit 76a5a4f

Browse files
authored
CLN: dont consolidate in reshape.concat (#34683)
1 parent f197ca5 commit 76a5a4f

File tree

3 files changed

+40
-24
lines changed

3 files changed

+40
-24
lines changed

pandas/core/internals/blocks.py

+27-18
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,16 @@
66

77
import numpy as np
88

9-
from pandas._libs import NaT, algos as libalgos, internals as libinternals, lib, writers
9+
from pandas._libs import (
10+
Interval,
11+
NaT,
12+
Period,
13+
Timestamp,
14+
algos as libalgos,
15+
internals as libinternals,
16+
lib,
17+
writers,
18+
)
1019
from pandas._libs.internals import BlockPlacement
1120
from pandas._libs.tslibs import conversion
1221
from pandas._libs.tslibs.timezones import tz_compare
@@ -41,17 +50,15 @@
4150
is_float_dtype,
4251
is_integer,
4352
is_integer_dtype,
44-
is_interval_dtype,
4553
is_list_like,
4654
is_object_dtype,
47-
is_period_dtype,
4855
is_re,
4956
is_re_compilable,
5057
is_sparse,
5158
is_timedelta64_dtype,
5259
pandas_dtype,
5360
)
54-
from pandas.core.dtypes.dtypes import ExtensionDtype
61+
from pandas.core.dtypes.dtypes import CategoricalDtype, ExtensionDtype
5562
from pandas.core.dtypes.generic import ABCDataFrame, ABCIndex, ABCPandasArray, ABCSeries
5663
from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna, isna_compat
5764

@@ -2629,36 +2636,38 @@ def get_block_type(values, dtype=None):
26292636
-------
26302637
cls : class, subclass of Block
26312638
"""
2639+
# We use vtype and kind checks because they are much more performant
2640+
# than is_foo_dtype
26322641
dtype = dtype or values.dtype
26332642
vtype = dtype.type
2643+
kind = dtype.kind
26342644

26352645
cls: Type[Block]
26362646

26372647
if is_sparse(dtype):
26382648
# Need this first(ish) so that Sparse[datetime] is sparse
26392649
cls = ExtensionBlock
2640-
elif is_categorical_dtype(values.dtype):
2650+
elif isinstance(dtype, CategoricalDtype):
26412651
cls = CategoricalBlock
2642-
elif issubclass(vtype, np.datetime64):
2643-
assert not is_datetime64tz_dtype(values.dtype)
2644-
cls = DatetimeBlock
2645-
elif is_datetime64tz_dtype(values.dtype):
2652+
elif vtype is Timestamp:
26462653
cls = DatetimeTZBlock
2647-
elif is_interval_dtype(dtype) or is_period_dtype(dtype):
2654+
elif vtype is Interval or vtype is Period:
26482655
cls = ObjectValuesExtensionBlock
2649-
elif is_extension_array_dtype(values.dtype):
2656+
elif isinstance(dtype, ExtensionDtype):
26502657
# Note: need to be sure PandasArray is unwrapped before we get here
26512658
cls = ExtensionBlock
2652-
elif issubclass(vtype, np.floating):
2653-
cls = FloatBlock
2654-
elif issubclass(vtype, np.timedelta64):
2655-
assert issubclass(vtype, np.integer)
2659+
2660+
elif kind == "M":
2661+
cls = DatetimeBlock
2662+
elif kind == "m":
26562663
cls = TimeDeltaBlock
2657-
elif issubclass(vtype, np.complexfloating):
2664+
elif kind == "f":
2665+
cls = FloatBlock
2666+
elif kind == "c":
26582667
cls = ComplexBlock
2659-
elif issubclass(vtype, np.integer):
2668+
elif kind == "i" or kind == "u":
26602669
cls = IntBlock
2661-
elif dtype == np.bool_:
2670+
elif kind == "b":
26622671
cls = BoolBlock
26632672
else:
26642673
cls = ObjectBlock

pandas/core/internals/concat.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -70,14 +70,21 @@ def concatenate_block_managers(
7070
vals = [ju.block.values for ju in join_units]
7171

7272
if not blk.is_extension:
73-
values = concat_compat(vals, axis=blk.ndim - 1)
73+
# _is_uniform_join_units ensures a single dtype, so
74+
# we can use np.concatenate, which is more performant
75+
# than concat_compat
76+
values = np.concatenate(vals, axis=blk.ndim - 1)
7477
else:
7578
# TODO(EA2D): special-casing not needed with 2D EAs
7679
values = concat_compat(vals)
7780
if not isinstance(values, ExtensionArray):
7881
values = values.reshape(1, len(values))
7982

80-
b = make_block(values, placement=placement, ndim=blk.ndim)
83+
if blk.values.dtype == values.dtype:
84+
# Fast-path
85+
b = blk.make_block_same_class(values, placement=placement)
86+
else:
87+
b = make_block(values, placement=placement, ndim=blk.ndim)
8188
else:
8289
b = make_block(
8390
_concatenate_join_units(join_units, concat_axis, copy=copy),

pandas/core/reshape/concat.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import numpy as np
1919

2020
from pandas._typing import FrameOrSeriesUnion, Label
21+
from pandas.util._decorators import cache_readonly
2122

2223
from pandas.core.dtypes.concat import concat_compat
2324
from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
@@ -360,7 +361,7 @@ def __init__(
360361
if len(objs) == 0:
361362
raise ValueError("All objects passed were None")
362363

363-
# consolidate data & figure out what our result ndim is going to be
364+
# figure out what our result ndim is going to be
364365
ndims = set()
365366
for obj in objs:
366367
if not isinstance(obj, (ABCSeries, ABCDataFrame)):
@@ -370,8 +371,6 @@ def __init__(
370371
)
371372
raise TypeError(msg)
372373

373-
# consolidate
374-
obj._consolidate_inplace()
375374
ndims.add(obj.ndim)
376375

377376
# get the sample
@@ -543,7 +542,7 @@ def _get_result_dim(self) -> int:
543542
def _get_new_axes(self) -> List[Index]:
544543
ndim = self._get_result_dim()
545544
return [
546-
self._get_concat_axis() if i == self.bm_axis else self._get_comb_axis(i)
545+
self._get_concat_axis if i == self.bm_axis else self._get_comb_axis(i)
547546
for i in range(ndim)
548547
]
549548

@@ -557,6 +556,7 @@ def _get_comb_axis(self, i: int) -> Index:
557556
copy=self.copy,
558557
)
559558

559+
@cache_readonly
560560
def _get_concat_axis(self) -> Index:
561561
"""
562562
Return index to be used along concatenation axis.

0 commit comments

Comments
 (0)