29
29
is_datetime64tz_dtype ,
30
30
is_dtype_equal ,
31
31
is_extension_array_dtype ,
32
- is_sparse ,
33
32
)
34
33
from pandas .core .dtypes .concat import (
35
34
cast_to_common_type ,
46
45
DatetimeArray ,
47
46
ExtensionArray ,
48
47
)
48
+ from pandas .core .arrays .sparse import SparseDtype
49
49
from pandas .core .construction import ensure_wrapped_if_datetimelike
50
50
from pandas .core .internals .array_manager import (
51
51
ArrayManager ,
@@ -260,7 +260,10 @@ def _get_mgr_concatenation_plan(mgr: BlockManager, indexers: dict[int, np.ndarra
260
260
mgr_shape_list [ax ] = len (indexer )
261
261
mgr_shape = tuple (mgr_shape_list )
262
262
263
+ has_column_indexer = False
264
+
263
265
if 0 in indexers :
266
+ has_column_indexer = True
264
267
ax0_indexer = indexers .pop (0 )
265
268
blknos = algos .take_nd (mgr .blknos , ax0_indexer , fill_value = - 1 )
266
269
blklocs = algos .take_nd (mgr .blklocs , ax0_indexer , fill_value = - 1 )
@@ -270,9 +273,6 @@ def _get_mgr_concatenation_plan(mgr: BlockManager, indexers: dict[int, np.ndarra
270
273
blk = mgr .blocks [0 ]
271
274
return [(blk .mgr_locs , JoinUnit (blk , mgr_shape , indexers ))]
272
275
273
- # error: Incompatible types in assignment (expression has type "None", variable
274
- # has type "ndarray")
275
- ax0_indexer = None # type: ignore[assignment]
276
276
blknos = mgr .blknos
277
277
blklocs = mgr .blklocs
278
278
@@ -288,6 +288,7 @@ def _get_mgr_concatenation_plan(mgr: BlockManager, indexers: dict[int, np.ndarra
288
288
shape = tuple (shape_list )
289
289
290
290
if blkno == - 1 :
291
+ # only reachable in the `0 in indexers` case
291
292
unit = JoinUnit (None , shape )
292
293
else :
293
294
blk = mgr .blocks [blkno ]
@@ -302,7 +303,7 @@ def _get_mgr_concatenation_plan(mgr: BlockManager, indexers: dict[int, np.ndarra
302
303
# placement was sequential before.
303
304
(
304
305
(
305
- ax0_indexer is None
306
+ not has_column_indexer
306
307
and blk .mgr_locs .is_slice_like
307
308
and blk .mgr_locs .as_slice .step == 1
308
309
)
@@ -330,6 +331,7 @@ def _get_mgr_concatenation_plan(mgr: BlockManager, indexers: dict[int, np.ndarra
330
331
class JoinUnit :
331
332
def __init__ (self , block , shape : Shape , indexers = None ):
332
333
# Passing shape explicitly is required for cases when block is None.
334
+ # Note: block is None implies indexers is None, but not vice-versa
333
335
if indexers is None :
334
336
indexers = {}
335
337
self .block = block
@@ -358,7 +360,7 @@ def dtype(self):
358
360
return blk .dtype
359
361
return ensure_dtype_can_hold_na (blk .dtype )
360
362
361
- def is_valid_na_for (self , dtype : DtypeObj ) -> bool :
363
+ def _is_valid_na_for (self , dtype : DtypeObj ) -> bool :
362
364
"""
363
365
Check that we are all-NA of a type/dtype that is compatible with this dtype.
364
366
Augments `self.is_na` with an additional check of the type of NA values.
@@ -389,11 +391,8 @@ def is_na(self) -> bool:
389
391
if not self .block ._can_hold_na :
390
392
return False
391
393
392
- # Usually it's enough to check but a small fraction of values to see if
393
- # a block is NOT null, chunks should help in such cases. 1000 value
394
- # was chosen rather arbitrarily.
395
394
values = self .block .values
396
- if is_sparse (self .block .values .dtype ):
395
+ if isinstance (self .block .values .dtype , SparseDtype ):
397
396
return False
398
397
elif self .block .is_extension :
399
398
# TODO(EA2D): no need for special case with 2D EAs
@@ -411,7 +410,8 @@ def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike:
411
410
else :
412
411
fill_value = upcasted_na
413
412
414
- if self .is_valid_na_for (empty_dtype ):
413
+ if self ._is_valid_na_for (empty_dtype ):
414
+ # note: always holds when self.block is None
415
415
blk_dtype = getattr (self .block , "dtype" , None )
416
416
417
417
if blk_dtype == np .dtype ("object" ):
@@ -592,13 +592,16 @@ def _is_uniform_join_units(join_units: list[JoinUnit]) -> bool:
592
592
_concatenate_join_units (which uses `concat_compat`).
593
593
594
594
"""
595
+ first = join_units [0 ].block
596
+ if first is None :
597
+ return False
595
598
return (
596
- # all blocks need to have the same type
597
- all (type (ju .block ) is type (join_units [ 0 ]. block ) for ju in join_units ) # noqa
599
+ # exclude cases where a) ju.block is None or b) we have e.g. Int64+int64
600
+ all (type (ju .block ) is type (first ) for ju in join_units )
598
601
and
599
602
# e.g. DatetimeLikeBlock can be dt64 or td64, but these are not uniform
600
603
all (
601
- is_dtype_equal (ju .block .dtype , join_units [ 0 ]. block .dtype )
604
+ is_dtype_equal (ju .block .dtype , first .dtype )
602
605
# GH#42092 we only want the dtype_equal check for non-numeric blocks
603
606
# (for now, may change but that would need a deprecation)
604
607
or ju .block .dtype .kind in ["b" , "i" , "u" ]
0 commit comments