9
9
10
10
import numpy as np
11
11
12
- from pandas ._libs import (
13
- NaT ,
14
- internals as libinternals ,
15
- )
12
+ from pandas ._libs import internals as libinternals
16
13
from pandas ._typing import (
17
14
ArrayLike ,
18
15
DtypeObj ,
@@ -391,21 +388,59 @@ def is_na(self) -> bool:
391
388
return True
392
389
return False
393
390
394
- def get_reindexed_values (self , empty_dtype : DtypeObj ) -> ArrayLike :
391
+ def get_reindexed_values (self , empty_dtype : DtypeObj , upcasted_na ) -> ArrayLike :
395
392
values : ArrayLike
396
393
397
- if self .is_na :
398
- return make_na_array (empty_dtype , self .shape )
399
-
394
+ if upcasted_na is None and not self .is_na :
395
+ # No upcasting is necessary
396
+ fill_value = self .block .fill_value
397
+ values = self .block .get_values ()
400
398
else :
399
+ fill_value = upcasted_na
400
+
401
+ if self .is_na :
402
+
403
+ if is_datetime64tz_dtype (empty_dtype ):
404
+ i8values = np .full (self .shape , fill_value .value )
405
+ return DatetimeArray (i8values , dtype = empty_dtype )
406
+
407
+ elif is_1d_only_ea_dtype (empty_dtype ):
408
+ empty_dtype = cast (ExtensionDtype , empty_dtype )
409
+ cls = empty_dtype .construct_array_type ()
410
+
411
+ missing_arr = cls ._from_sequence ([], dtype = empty_dtype )
412
+ ncols , nrows = self .shape
413
+ assert ncols == 1 , ncols
414
+ empty_arr = - 1 * np .ones ((nrows ,), dtype = np .intp )
415
+ return missing_arr .take (
416
+ empty_arr , allow_fill = True , fill_value = fill_value
417
+ )
418
+ elif isinstance (empty_dtype , ExtensionDtype ):
419
+ # TODO: no tests get here, a handful would if we disabled
420
+ # the dt64tz special-case above (which is faster)
421
+ cls = empty_dtype .construct_array_type ()
422
+ missing_arr = cls ._empty (shape = self .shape , dtype = empty_dtype )
423
+ missing_arr [:] = fill_value
424
+ return missing_arr
425
+ else :
426
+ # NB: we should never get here with empty_dtype integer or bool;
427
+ # if we did, the missing_arr.fill would cast to gibberish
428
+ missing_arr = np .empty (self .shape , dtype = empty_dtype )
429
+ missing_arr .fill (fill_value )
430
+ return missing_arr
401
431
402
432
if (not self .indexers ) and (not self .block ._can_consolidate ):
403
433
# preserve these for validation in concat_compat
404
434
return self .block .values
405
435
406
- # No dtype upcasting is done here, it will be performed during
407
- # concatenation itself.
408
- values = self .block .values
436
+ if self .block .is_bool :
437
+ # External code requested filling/upcasting, bool values must
438
+ # be upcasted to object to avoid being upcasted to numeric.
439
+ values = self .block .astype (np .object_ ).values
440
+ else :
441
+ # No dtype upcasting is done here, it will be performed during
442
+ # concatenation itself.
443
+ values = self .block .values
409
444
410
445
if not self .indexers :
411
446
# If there's no indexing to be done, we want to signal outside
@@ -420,40 +455,6 @@ def get_reindexed_values(self, empty_dtype: DtypeObj) -> ArrayLike:
420
455
return values
421
456
422
457
423
- def make_na_array (dtype : DtypeObj , shape : Shape ) -> ArrayLike :
424
- """
425
- Construct an np.ndarray or ExtensionArray of the given dtype and shape
426
- holding all-NA values.
427
- """
428
- if is_datetime64tz_dtype (dtype ):
429
- # NaT here is analogous to dtype.na_value below
430
- i8values = np .full (shape , NaT .value )
431
- return DatetimeArray (i8values , dtype = dtype )
432
-
433
- elif is_1d_only_ea_dtype (dtype ):
434
- dtype = cast (ExtensionDtype , dtype )
435
- cls = dtype .construct_array_type ()
436
-
437
- missing_arr = cls ._from_sequence ([], dtype = dtype )
438
- nrows = shape [- 1 ]
439
- taker = - 1 * np .ones ((nrows ,), dtype = np .intp )
440
- return missing_arr .take (taker , allow_fill = True , fill_value = dtype .na_value )
441
- elif isinstance (dtype , ExtensionDtype ):
442
- # TODO: no tests get here, a handful would if we disabled
443
- # the dt64tz special-case above (which is faster)
444
- cls = dtype .construct_array_type ()
445
- missing_arr = cls ._empty (shape = shape , dtype = dtype )
446
- missing_arr [:] = dtype .na_value
447
- return missing_arr
448
- else :
449
- # NB: we should never get here with dtype integer or bool;
450
- # if we did, the missing_arr.fill would cast to gibberish
451
- missing_arr = np .empty (shape , dtype = dtype )
452
- fill_value = _dtype_to_na_value (dtype )
453
- missing_arr .fill (fill_value )
454
- return missing_arr
455
-
456
-
457
458
def _concatenate_join_units (
458
459
join_units : list [JoinUnit ], concat_axis : int , copy : bool
459
460
) -> ArrayLike :
@@ -466,7 +467,12 @@ def _concatenate_join_units(
466
467
467
468
empty_dtype = _get_empty_dtype (join_units )
468
469
469
- to_concat = [ju .get_reindexed_values (empty_dtype = empty_dtype ) for ju in join_units ]
470
+ upcasted_na = _dtype_to_na_value (empty_dtype )
471
+
472
+ to_concat = [
473
+ ju .get_reindexed_values (empty_dtype = empty_dtype , upcasted_na = upcasted_na )
474
+ for ju in join_units
475
+ ]
470
476
471
477
if len (to_concat ) == 1 :
472
478
# Only one block, nothing to concatenate.
0 commit comments