9
9
10
10
import numpy as np
11
11
12
- from pandas ._libs import internals as libinternals
12
+ from pandas ._libs import (
13
+ NaT ,
14
+ internals as libinternals ,
15
+ )
13
16
from pandas ._typing import (
14
17
ArrayLike ,
15
18
DtypeObj ,
@@ -383,59 +386,21 @@ def is_na(self) -> bool:
383
386
return True
384
387
return False
385
388
386
- def get_reindexed_values (self , empty_dtype : DtypeObj , upcasted_na ) -> ArrayLike :
389
+ def get_reindexed_values (self , empty_dtype : DtypeObj ) -> ArrayLike :
387
390
values : ArrayLike
388
391
389
- if upcasted_na is None and not self .is_na :
390
- # No upcasting is necessary
391
- fill_value = self .block .fill_value
392
- values = self .block .get_values ()
392
+ if self .is_na :
393
+ return make_na_array (empty_dtype , self .shape )
394
+
393
395
else :
394
- fill_value = upcasted_na
395
-
396
- if self .is_na :
397
-
398
- if is_datetime64tz_dtype (empty_dtype ):
399
- i8values = np .full (self .shape , fill_value .value )
400
- return DatetimeArray (i8values , dtype = empty_dtype )
401
-
402
- elif is_1d_only_ea_dtype (empty_dtype ):
403
- empty_dtype = cast (ExtensionDtype , empty_dtype )
404
- cls = empty_dtype .construct_array_type ()
405
-
406
- missing_arr = cls ._from_sequence ([], dtype = empty_dtype )
407
- ncols , nrows = self .shape
408
- assert ncols == 1 , ncols
409
- empty_arr = - 1 * np .ones ((nrows ,), dtype = np .intp )
410
- return missing_arr .take (
411
- empty_arr , allow_fill = True , fill_value = fill_value
412
- )
413
- elif isinstance (empty_dtype , ExtensionDtype ):
414
- # TODO: no tests get here, a handful would if we disabled
415
- # the dt64tz special-case above (which is faster)
416
- cls = empty_dtype .construct_array_type ()
417
- missing_arr = cls ._empty (shape = self .shape , dtype = empty_dtype )
418
- missing_arr [:] = fill_value
419
- return missing_arr
420
- else :
421
- # NB: we should never get here with empty_dtype integer or bool;
422
- # if we did, the missing_arr.fill would cast to gibberish
423
- missing_arr = np .empty (self .shape , dtype = empty_dtype )
424
- missing_arr .fill (fill_value )
425
- return missing_arr
426
396
427
397
if (not self .indexers ) and (not self .block ._can_consolidate ):
428
398
# preserve these for validation in concat_compat
429
399
return self .block .values
430
400
431
- if self .block .is_bool :
432
- # External code requested filling/upcasting, bool values must
433
- # be upcasted to object to avoid being upcasted to numeric.
434
- values = self .block .astype (np .object_ ).values
435
- else :
436
- # No dtype upcasting is done here, it will be performed during
437
- # concatenation itself.
438
- values = self .block .values
401
+ # No dtype upcasting is done here, it will be performed during
402
+ # concatenation itself.
403
+ values = self .block .values
439
404
440
405
if not self .indexers :
441
406
# If there's no indexing to be done, we want to signal outside
@@ -450,6 +415,40 @@ def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike:
450
415
return values
451
416
452
417
418
+ def make_na_array (dtype : DtypeObj , shape : Shape ) -> ArrayLike :
419
+ """
420
+ Construct an np.ndarray or ExtensionArray of the given dtype and shape
421
+ holding all-NA values.
422
+ """
423
+ if is_datetime64tz_dtype (dtype ):
424
+ # NaT here is analogous to dtype.na_value below
425
+ i8values = np .full (shape , NaT .value )
426
+ return DatetimeArray (i8values , dtype = dtype )
427
+
428
+ elif is_1d_only_ea_dtype (dtype ):
429
+ dtype = cast (ExtensionDtype , dtype )
430
+ cls = dtype .construct_array_type ()
431
+
432
+ missing_arr = cls ._from_sequence ([], dtype = dtype )
433
+ nrows = shape [- 1 ]
434
+ taker = - 1 * np .ones ((nrows ,), dtype = np .intp )
435
+ return missing_arr .take (taker , allow_fill = True , fill_value = dtype .na_value )
436
+ elif isinstance (dtype , ExtensionDtype ):
437
+ # TODO: no tests get here, a handful would if we disabled
438
+ # the dt64tz special-case above (which is faster)
439
+ cls = dtype .construct_array_type ()
440
+ missing_arr = cls ._empty (shape = shape , dtype = dtype )
441
+ missing_arr [:] = dtype .na_value
442
+ return missing_arr
443
+ else :
444
+ # NB: we should never get here with dtype integer or bool;
445
+ # if we did, the missing_arr.fill would cast to gibberish
446
+ missing_arr = np .empty (shape , dtype = dtype )
447
+ fill_value = _dtype_to_na_value (dtype )
448
+ missing_arr .fill (fill_value )
449
+ return missing_arr
450
+
451
+
453
452
def _concatenate_join_units (
454
453
join_units : list [JoinUnit ], concat_axis : int , copy : bool
455
454
) -> ArrayLike :
@@ -462,12 +461,7 @@ def _concatenate_join_units(
462
461
463
462
empty_dtype = _get_empty_dtype (join_units )
464
463
465
- upcasted_na = _dtype_to_na_value (empty_dtype )
466
-
467
- to_concat = [
468
- ju .get_reindexed_values (empty_dtype = empty_dtype , upcasted_na = upcasted_na )
469
- for ju in join_units
470
- ]
464
+ to_concat = [ju .get_reindexed_values (empty_dtype = empty_dtype ) for ju in join_units ]
471
465
472
466
if len (to_concat ) == 1 :
473
467
# Only one block, nothing to concatenate.
0 commit comments