Skip to content

Commit 1c35aca

Browse files
use Block.concat_same_type in concatenate_block_managers (concatting DataFrames)
1 parent 7676f03 commit 1c35aca

File tree

2 files changed

+70
-17
lines changed

2 files changed

+70
-17
lines changed

pandas/core/internals.py

+44-14
Original file line numberDiff line numberDiff line change
@@ -312,13 +312,14 @@ def ftype(self):
312312
def merge(self, other):
313313
return _merge_blocks([self, other])
314314

315-
def concat_same_type(self, to_concat):
315+
def concat_same_type(self, to_concat, placement=None):
316316
"""
317317
Concatenate list of single blocks of the same type.
318318
"""
319-
values = np.concatenate([blk.values for blk in to_concat])
319+
values = np.concatenate([blk.values for blk in to_concat],
320+
axis=self.ndim - 1)
320321
return self.make_block_same_class(
321-
values, placement=slice(0, len(values), 1))
322+
values, placement=placement or slice(0, len(values), 1))
322323

323324
def reindex_axis(self, indexer, method=None, axis=1, fill_value=None,
324325
limit=None, mask_info=None):
@@ -2415,7 +2416,7 @@ def to_native_types(self, slicer=None, na_rep='', quoting=None, **kwargs):
24152416
# we are expected to return a 2-d ndarray
24162417
return values.reshape(1, len(values))
24172418

2418-
def concat_same_type(self, to_concat):
2419+
def concat_same_type(self, to_concat, placement=None):
24192420
"""
24202421
Concatenate list of single blocks of the same type.
24212422
"""
@@ -2424,9 +2425,9 @@ def concat_same_type(self, to_concat):
24242425

24252426
if is_categorical_dtype(values.dtype):
24262427
return self.make_block_same_class(
2427-
values, placement=slice(0, len(values), 1))
2428+
values, placement=placement or slice(0, len(values), 1))
24282429
else:
2429-
return make_block(values, placement=slice(0, len(values), 1))
2430+
return make_block(values, placement=placement or slice(0, len(values), 1))
24302431

24312432

24322433
class DatetimeBlock(DatetimeLikeBlockMixin, Block):
@@ -2705,7 +2706,7 @@ def shift(self, periods, axis=0, mgr=None):
27052706
return [self.make_block_same_class(new_values,
27062707
placement=self.mgr_locs)]
27072708

2708-
def concat_same_type(self, to_concat):
2709+
def concat_same_type(self, to_concat, placement=None):
27092710
"""
27102711
Concatenate list of single blocks of the same type.
27112712
"""
@@ -2714,9 +2715,9 @@ def concat_same_type(self, to_concat):
27142715

27152716
if is_datetimetz(values):
27162717
return self.make_block_same_class(
2717-
values, placement=slice(0, len(values), 1))
2718+
values, placement=placement or slice(0, len(values), 1))
27182719
else:
2719-
return make_block(values, placement=slice(0, len(values), 1))
2720+
return make_block(values, placement=placement or slice(0, len(values), 1))
27202721

27212722

27222723
class SparseBlock(NonConsolidatableMixIn, Block):
@@ -2885,15 +2886,15 @@ def sparse_reindex(self, new_index):
28852886
return self.make_block_same_class(values, sparse_index=new_index,
28862887
placement=self.mgr_locs)
28872888

2888-
def concat_same_type(self, to_concat):
2889+
def concat_same_type(self, to_concat, placement=None):
28892890
"""
28902891
Concatenate list of single blocks of the same type.
28912892
"""
28922893
to_concat = [blk.values for blk in to_concat]
28932894
values = _concat._concat_sparse(to_concat)
28942895

28952896
return self.make_block_same_class(
2896-
values, placement=slice(0, len(values), 1))
2897+
values, placement=placement or slice(0, len(values), 1))
28972898

28982899

28992900
def make_block(values, placement, klass=None, ndim=None, dtype=None,
@@ -5146,13 +5147,42 @@ def concatenate_block_managers(mgrs_indexers, axes, concat_axis, copy):
51465147
[get_mgr_concatenation_plan(mgr, indexers)
51475148
for mgr, indexers in mgrs_indexers], concat_axis)
51485149

5149-
blocks = [make_block(
5150-
concatenate_join_units(join_units, concat_axis, copy=copy),
5151-
placement=placement) for placement, join_units in concat_plan]
5150+
blocks = []
5151+
5152+
for placement, join_units in concat_plan:
5153+
5154+
if is_uniform_join_units(join_units):
5155+
b = join_units[0].block.concat_same_type(
5156+
[ju.block for ju in join_units], placement=placement)
5157+
else:
5158+
b = make_block(
5159+
concatenate_join_units(join_units, concat_axis, copy=copy),
5160+
placement=placement)
5161+
blocks.append(b)
51525162

51535163
return BlockManager(blocks, axes)
51545164

51555165

5166+
def is_uniform_join_units(join_units):
5167+
"""
5168+
Check if the join units consist of blocks of uniform type that can
5169+
be concatenated using Block.concat_same_type instead of the generic
5170+
concatenate_join_units (which uses `_concat._concat_compat`).
5171+
5172+
"""
5173+
return (
5174+
# all blocks need to have the same type
5175+
all([type(ju.block) is type(join_units[0].block) for ju in join_units]) # noqa
5176+
# no blocks that would get missing values (can lead to type upcasts)
5177+
and all([not ju.is_na for ju in join_units])
5178+
# no blocks with indexers (as then the dimensions do not fit)
5179+
and all([not ju.indexers for ju in join_units])
5180+
# disregard Panels
5181+
and all([ju.block.ndim <= 2 for ju in join_units])
5182+
# only use this path when there is something to concatenate
5183+
and len(join_units) > 1)
5184+
5185+
51565186
def get_empty_dtype_and_na(join_units):
51575187
"""
51585188
Return dtype and N/A values to use when concatenating specified units.

pandas/tests/internals/test_external_block.py

+26-3
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,25 @@
44
import numpy as np
55

66
import pandas as pd
7-
from pandas.core.internals import Block, BlockManager, SingleBlockManager
7+
from pandas.core.internals import Block, BlockManager, SingleBlockManager, NonConsolidatableMixIn
88

99

10-
class CustomBlock(Block):
10+
class CustomBlock(NonConsolidatableMixIn, Block):
11+
12+
_holder = np.ndarray
1113

1214
def formatting_values(self):
1315
return np.array(["Val: {}".format(i) for i in self.values])
1416

17+
def concat_same_type(self, to_concat, placement=None):
18+
"""
19+
Always concatenate disregarding self.ndim as the values are
20+
always 1D in this custom Block
21+
"""
22+
values = np.concatenate([blk.values for blk in to_concat])
23+
return self.make_block_same_class(
24+
values, placement=placement or slice(0, len(values), 1))
25+
1526

1627
def test_custom_repr():
1728
values = np.arange(3, dtype='int64')
@@ -23,7 +34,7 @@ def test_custom_repr():
2334
assert repr(s) == '0 Val: 0\n1 Val: 1\n2 Val: 2\ndtype: int64'
2435

2536
# dataframe
26-
block = CustomBlock(values.reshape(1, -1), placement=slice(0, 1))
37+
block = CustomBlock(values, placement=slice(0, 1))
2738
blk_mgr = BlockManager([block], [['col'], range(3)])
2839
df = pd.DataFrame(blk_mgr)
2940
assert repr(df) == ' col\n0 Val: 0\n1 Val: 1\n2 Val: 2'
@@ -36,3 +47,15 @@ def test_concat_series():
3647

3748
res = pd.concat([s, s])
3849
assert isinstance(res._data.blocks[0], CustomBlock)
50+
51+
52+
def test_concat_dataframe():
53+
df = pd.DataFrame({'a': [1, 2, 3]})
54+
blocks = df._data.blocks
55+
values = np.arange(3, dtype='int64')
56+
custom_block = CustomBlock(values, placement=slice(1, 2))
57+
blocks = blocks + (custom_block, )
58+
block_manager = BlockManager(blocks, [pd.Index(['a', 'b']), df.index])
59+
df = pd.DataFrame(block_manager)
60+
res = pd.concat([df, df])
61+
assert isinstance(res._data.blocks[1], CustomBlock)

0 commit comments

Comments
 (0)