@@ -102,6 +102,7 @@ class Block(PandasObject):
102
102
_validate_ndim = True
103
103
_ftype = 'dense'
104
104
_holder = None
105
+ _concatenator = staticmethod (np .concatenate )
105
106
106
107
def __init__ (self , values , placement , ndim = None , fastpath = False ):
107
108
if ndim is None :
@@ -314,6 +315,15 @@ def ftype(self):
314
315
def merge (self , other ):
315
316
return _merge_blocks ([self , other ])
316
317
318
+ def concat_same_type (self , to_concat , placement = None ):
319
+ """
320
+ Concatenate list of single blocks of the same type.
321
+ """
322
+ values = self ._concatenator ([blk .values for blk in to_concat ],
323
+ axis = self .ndim - 1 )
324
+ return self .make_block_same_class (
325
+ values , placement = placement or slice (0 , len (values ), 1 ))
326
+
317
327
def reindex_axis (self , indexer , method = None , axis = 1 , fill_value = None ,
318
328
limit = None , mask_info = None ):
319
329
"""
@@ -2309,6 +2319,7 @@ class CategoricalBlock(NonConsolidatableMixIn, ObjectBlock):
2309
2319
_verify_integrity = True
2310
2320
_can_hold_na = True
2311
2321
_holder = Categorical
2322
+ _concatenator = staticmethod (_concat ._concat_categorical )
2312
2323
2313
2324
def __init__ (self , values , placement , fastpath = False , ** kwargs ):
2314
2325
@@ -2432,6 +2443,17 @@ def to_native_types(self, slicer=None, na_rep='', quoting=None, **kwargs):
2432
2443
# we are expected to return a 2-d ndarray
2433
2444
return values .reshape (1 , len (values ))
2434
2445
2446
+ def concat_same_type (self , to_concat , placement = None ):
2447
+ """
2448
+ Concatenate list of single blocks of the same type.
2449
+ """
2450
+ values = self ._concatenator ([blk .values for blk in to_concat ],
2451
+ axis = self .ndim - 1 )
2452
+ # not using self.make_block_same_class as values can be object dtype
2453
+ return make_block (
2454
+ values , placement = placement or slice (0 , len (values ), 1 ),
2455
+ ndim = self .ndim )
2456
+
2435
2457
2436
2458
class DatetimeBlock (DatetimeLikeBlockMixin , Block ):
2437
2459
__slots__ = ()
@@ -2571,6 +2593,7 @@ class DatetimeTZBlock(NonConsolidatableMixIn, DatetimeBlock):
2571
2593
""" implement a datetime64 block with a tz attribute """
2572
2594
__slots__ = ()
2573
2595
_holder = DatetimeIndex
2596
+ _concatenator = staticmethod (_concat ._concat_datetime )
2574
2597
is_datetimetz = True
2575
2598
2576
2599
def __init__ (self , values , placement , ndim = 2 , ** kwargs ):
@@ -2711,6 +2734,16 @@ def shift(self, periods, axis=0, mgr=None):
2711
2734
return [self .make_block_same_class (new_values ,
2712
2735
placement = self .mgr_locs )]
2713
2736
2737
+ def concat_same_type (self , to_concat , placement = None ):
2738
+ """
2739
+ Concatenate list of single blocks of the same type.
2740
+ """
2741
+ values = self ._concatenator ([blk .values for blk in to_concat ],
2742
+ axis = self .ndim - 1 )
2743
+ # not using self.make_block_same_class as values can be non-tz dtype
2744
+ return make_block (
2745
+ values , placement = placement or slice (0 , len (values ), 1 ))
2746
+
2714
2747
2715
2748
class SparseBlock (NonConsolidatableMixIn , Block ):
2716
2749
""" implement as a list of sparse arrays of the same dtype """
@@ -2721,6 +2754,7 @@ class SparseBlock(NonConsolidatableMixIn, Block):
2721
2754
_can_hold_na = True
2722
2755
_ftype = 'sparse'
2723
2756
_holder = SparseArray
2757
+ _concatenator = staticmethod (_concat ._concat_sparse )
2724
2758
2725
2759
@property
2726
2760
def shape (self ):
@@ -4517,6 +4551,45 @@ def fast_xs(self, loc):
4517
4551
"""
4518
4552
return self ._block .values [loc ]
4519
4553
4554
+ def concat (self , to_concat , new_axis ):
4555
+ """
4556
+ Concatenate a list of SingleBlockManagers into a single
4557
+ SingleBlockManager.
4558
+
4559
+ Used for pd.concat of Series objects with axis=0.
4560
+
4561
+ Parameters
4562
+ ----------
4563
+ to_concat : list of SingleBlockManagers
4564
+ new_axis : Index of the result
4565
+
4566
+ Returns
4567
+ -------
4568
+ SingleBlockManager
4569
+
4570
+ """
4571
+ non_empties = [x for x in to_concat if len (x ) > 0 ]
4572
+
4573
+ # check if all series are of the same block type:
4574
+ if len (non_empties ) > 0 :
4575
+ blocks = [obj .blocks [0 ] for obj in non_empties ]
4576
+
4577
+ if all ([type (b ) is type (blocks [0 ]) for b in blocks [1 :]]): # noqa
4578
+ new_block = blocks [0 ].concat_same_type (blocks )
4579
+ else :
4580
+ values = [x .values for x in blocks ]
4581
+ values = _concat ._concat_compat (values )
4582
+ new_block = make_block (
4583
+ values , placement = slice (0 , len (values ), 1 ))
4584
+ else :
4585
+ values = [x ._block .values for x in to_concat ]
4586
+ values = _concat ._concat_compat (values )
4587
+ new_block = make_block (
4588
+ values , placement = slice (0 , len (values ), 1 ))
4589
+
4590
+ mgr = SingleBlockManager (new_block , new_axis )
4591
+ return mgr
4592
+
4520
4593
4521
4594
def construction_error (tot_items , block_shape , axes , e = None ):
4522
4595
""" raise a helpful message about our construction """
@@ -5105,13 +5178,42 @@ def concatenate_block_managers(mgrs_indexers, axes, concat_axis, copy):
5105
5178
[get_mgr_concatenation_plan (mgr , indexers )
5106
5179
for mgr , indexers in mgrs_indexers ], concat_axis )
5107
5180
5108
- blocks = [make_block (
5109
- concatenate_join_units (join_units , concat_axis , copy = copy ),
5110
- placement = placement ) for placement , join_units in concat_plan ]
5181
+ blocks = []
5182
+
5183
+ for placement , join_units in concat_plan :
5184
+
5185
+ if is_uniform_join_units (join_units ):
5186
+ b = join_units [0 ].block .concat_same_type (
5187
+ [ju .block for ju in join_units ], placement = placement )
5188
+ else :
5189
+ b = make_block (
5190
+ concatenate_join_units (join_units , concat_axis , copy = copy ),
5191
+ placement = placement )
5192
+ blocks .append (b )
5111
5193
5112
5194
return BlockManager (blocks , axes )
5113
5195
5114
5196
5197
+ def is_uniform_join_units (join_units ):
5198
+ """
5199
+ Check if the join units consist of blocks of uniform type that can
5200
+ be concatenated using Block.concat_same_type instead of the generic
5201
+ concatenate_join_units (which uses `_concat._concat_compat`).
5202
+
5203
+ """
5204
+ return (
5205
+ # all blocks need to have the same type
5206
+ all ([type (ju .block ) is type (join_units [0 ].block ) for ju in join_units ]) and # noqa
5207
+ # no blocks that would get missing values (can lead to type upcasts)
5208
+ all ([not ju .is_na for ju in join_units ]) and
5209
+ # no blocks with indexers (as then the dimensions do not fit)
5210
+ all ([not ju .indexers for ju in join_units ]) and
5211
+ # disregard Panels
5212
+ all ([ju .block .ndim <= 2 for ju in join_units ]) and
5213
+ # only use this path when there is something to concatenate
5214
+ len (join_units ) > 1 )
5215
+
5216
+
5115
5217
def get_empty_dtype_and_na (join_units ):
5116
5218
"""
5117
5219
Return dtype and N/A values to use when concatenating specified units.
0 commit comments