Skip to content

Commit 2a4ec05

Browse files
authored
REF: remove Block.concat_same_type (#33486)
1 parent a942836 commit 2a4ec05

File tree

3 files changed

+16
-64
lines changed

3 files changed

+16
-64
lines changed

pandas/core/internals/blocks.py

-55
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@
4848
is_timedelta64_dtype,
4949
pandas_dtype,
5050
)
51-
from pandas.core.dtypes.concat import concat_categorical, concat_datetime
5251
from pandas.core.dtypes.dtypes import ExtensionDtype
5352
from pandas.core.dtypes.generic import (
5453
ABCDataFrame,
@@ -110,7 +109,6 @@ class Block(PandasObject):
110109
_can_consolidate = True
111110
_verify_integrity = True
112111
_validate_ndim = True
113-
_concatenator = staticmethod(np.concatenate)
114112

115113
def __init__(self, values, placement, ndim=None):
116114
self.ndim = self._check_ndim(values, ndim)
@@ -309,16 +307,6 @@ def shape(self):
309307
def dtype(self):
310308
return self.values.dtype
311309

312-
def concat_same_type(self, to_concat):
313-
"""
314-
Concatenate list of single blocks of the same type.
315-
"""
316-
values = self._concatenator(
317-
[blk.values for blk in to_concat], axis=self.ndim - 1
318-
)
319-
placement = self.mgr_locs if self.ndim == 2 else slice(len(values))
320-
return self.make_block_same_class(values, placement=placement)
321-
322310
def iget(self, i):
323311
return self.values[i]
324312

@@ -1770,14 +1758,6 @@ def _slice(self, slicer):
17701758

17711759
return self.values[slicer]
17721760

1773-
def concat_same_type(self, to_concat):
1774-
"""
1775-
Concatenate list of single blocks of the same type.
1776-
"""
1777-
values = self._holder._concat_same_type([blk.values for blk in to_concat])
1778-
placement = self.mgr_locs if self.ndim == 2 else slice(len(values))
1779-
return self.make_block_same_class(values, placement=placement)
1780-
17811761
def fillna(self, value, limit=None, inplace=False, downcast=None):
17821762
values = self.values if inplace else self.values.copy()
17831763
values = values.fillna(value=value, limit=limit)
@@ -2258,20 +2238,6 @@ def diff(self, n: int, axis: int = 0) -> List["Block"]:
22582238
new_values = new_values.astype("timedelta64[ns]")
22592239
return [TimeDeltaBlock(new_values, placement=self.mgr_locs.indexer)]
22602240

2261-
def concat_same_type(self, to_concat):
2262-
# need to handle concat([tz1, tz2]) here, since DatetimeArray
2263-
# only handles cases where all the tzs are the same.
2264-
# Instead of placing the condition here, it could also go into the
2265-
# is_uniform_join_units check, but I'm not sure what is better.
2266-
if len({x.dtype for x in to_concat}) > 1:
2267-
values = concat_datetime([x.values for x in to_concat])
2268-
2269-
values = values.astype(object, copy=False)
2270-
placement = self.mgr_locs if self.ndim == 2 else slice(len(values))
2271-
2272-
return self.make_block(values, placement=placement)
2273-
return super().concat_same_type(to_concat)
2274-
22752241
def fillna(self, value, limit=None, inplace=False, downcast=None):
22762242
# We support filling a DatetimeTZ with a `value` whose timezone
22772243
# is different by coercing to object.
@@ -2642,7 +2608,6 @@ class CategoricalBlock(ExtensionBlock):
26422608
is_categorical = True
26432609
_verify_integrity = True
26442610
_can_hold_na = True
2645-
_concatenator = staticmethod(concat_categorical)
26462611

26472612
should_store = Block.should_store
26482613

@@ -2656,26 +2621,6 @@ def __init__(self, values, placement, ndim=None):
26562621
def _holder(self):
26572622
return Categorical
26582623

2659-
def concat_same_type(self, to_concat):
2660-
"""
2661-
Concatenate list of single blocks of the same type.
2662-
2663-
Note that this CategoricalBlock._concat_same_type *may* not
2664-
return a CategoricalBlock. When the categories in `to_concat`
2665-
differ, this will return an object ndarray.
2666-
2667-
If / when we decide we don't like that behavior:
2668-
2669-
1. Change Categorical._concat_same_type to use union_categoricals
2670-
2. Delete this method.
2671-
"""
2672-
values = self._concatenator(
2673-
[blk.values for blk in to_concat], axis=self.ndim - 1
2674-
)
2675-
placement = self.mgr_locs if self.ndim == 2 else slice(len(values))
2676-
# not using self.make_block_same_class as values can be object dtype
2677-
return self.make_block(values, placement=placement)
2678-
26792624
def replace(
26802625
self,
26812626
to_replace,

pandas/core/internals/concat.py

+16-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# TODO: Needs a better name; too many modules are already called "concat"
22
from collections import defaultdict
33
import copy
4+
from typing import List
45

56
import numpy as np
67

@@ -61,8 +62,18 @@ def concatenate_block_managers(
6162
values = values.view()
6263
b = b.make_block_same_class(values, placement=placement)
6364
elif _is_uniform_join_units(join_units):
64-
b = join_units[0].block.concat_same_type([ju.block for ju in join_units])
65-
b.mgr_locs = placement
65+
blk = join_units[0].block
66+
vals = [ju.block.values for ju in join_units]
67+
68+
if not blk.is_extension or blk.is_datetimetz or blk.is_categorical:
69+
# datetimetz and categorical can have the same type but multiple
70+
# dtypes, concatting does not necessarily preserve dtype
71+
values = concat_compat(vals, axis=blk.ndim - 1)
72+
else:
73+
# TODO(EA2D): special-casing not needed with 2D EAs
74+
values = concat_compat(vals)
75+
76+
b = make_block(values, placement=placement, ndim=blk.ndim)
6677
else:
6778
b = make_block(
6879
_concatenate_join_units(join_units, concat_axis, copy=copy),
@@ -419,13 +430,15 @@ def _get_empty_dtype_and_na(join_units):
419430
raise AssertionError(msg)
420431

421432

422-
def _is_uniform_join_units(join_units) -> bool:
433+
def _is_uniform_join_units(join_units: List[JoinUnit]) -> bool:
423434
"""
424435
Check if the join units consist of blocks of uniform type that can
425436
be concatenated using Block.concat_same_type instead of the generic
426437
_concatenate_join_units (which uses `concat_compat`).
427438
428439
"""
440+
# TODO: require dtype match in addition to same type? e.g. DatetimeTZBlock
441+
# cannot necessarily join
429442
return (
430443
# all blocks need to have the same type
431444
all(type(ju.block) is type(join_units[0].block) for ju in join_units)

pandas/tests/extension/test_external_block.py

-6
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,6 @@ def df():
3232
return pd.DataFrame(block_manager)
3333

3434

35-
def test_concat_dataframe(df):
36-
# GH17728
37-
res = pd.concat([df, df])
38-
assert isinstance(res._mgr.blocks[1], CustomBlock)
39-
40-
4135
def test_concat_axis1(df):
4236
# GH17954
4337
df2 = pd.DataFrame({"c": [0.1, 0.2, 0.3]})

0 commit comments

Comments
 (0)