10
10
11
11
import numpy as np
12
12
13
- from pandas ._libs import internals as libinternals
13
+ from pandas ._libs import (
14
+ NaT ,
15
+ internals as libinternals ,
16
+ )
17
+ from pandas ._libs .missing import NA
14
18
from pandas ._typing import (
15
19
ArrayLike ,
16
20
DtypeObj ,
27
31
is_1d_only_ea_dtype ,
28
32
is_datetime64tz_dtype ,
29
33
is_dtype_equal ,
34
+ needs_i8_conversion ,
30
35
)
31
36
from pandas .core .dtypes .concat import (
32
37
cast_to_common_type ,
33
38
concat_compat ,
34
39
)
35
40
from pandas .core .dtypes .dtypes import ExtensionDtype
41
+ from pandas .core .dtypes .missing import is_valid_na_for_dtype
36
42
37
43
import pandas .core .algorithms as algos
38
44
from pandas .core .arrays import (
@@ -378,6 +384,36 @@ def dtype(self):
378
384
return blk .dtype
379
385
return ensure_dtype_can_hold_na (blk .dtype )
380
386
387
+ def _is_valid_na_for (self , dtype : DtypeObj ) -> bool :
388
+ """
389
+ Check that we are all-NA of a type/dtype that is compatible with this dtype.
390
+ Augments `self.is_na` with an additional check of the type of NA values.
391
+ """
392
+ if not self .is_na :
393
+ return False
394
+ if self .block .dtype .kind == "V" :
395
+ return True
396
+
397
+ if self .dtype == object :
398
+ values = self .block .values
399
+ return all (is_valid_na_for_dtype (x , dtype ) for x in values .ravel (order = "K" ))
400
+
401
+ na_value = self .block .fill_value
402
+ if na_value is NaT and not is_dtype_equal (self .dtype , dtype ):
403
+ # e.g. we are dt64 and other is td64
404
+ # fill_values match but we should not cast self.block.values to dtype
405
+ # TODO: this will need updating if we ever have non-nano dt64/td64
406
+ return False
407
+
408
+ if na_value is NA and needs_i8_conversion (dtype ):
409
+ # FIXME: kludge; test_append_empty_frame_with_timedelta64ns_nat
410
+ # e.g. self.dtype == "Int64" and dtype is td64, we dont want
411
+ # to consider these as matching
412
+ return False
413
+
414
+ # TODO: better to use can_hold_element?
415
+ return is_valid_na_for_dtype (na_value , dtype )
416
+
381
417
@cache_readonly
382
418
def is_na (self ) -> bool :
383
419
blk = self .block
@@ -388,14 +424,24 @@ def is_na(self) -> bool:
388
424
def get_reindexed_values (self , empty_dtype : DtypeObj , upcasted_na ) -> ArrayLike :
389
425
values : ArrayLike
390
426
391
- if upcasted_na is None and not self .is_na :
427
+ if upcasted_na is None and self .block . dtype . kind != "V" :
392
428
# No upcasting is necessary
393
429
fill_value = self .block .fill_value
394
430
values = self .block .get_values ()
395
431
else :
396
432
fill_value = upcasted_na
397
433
398
- if self .is_na :
434
+ if self ._is_valid_na_for (empty_dtype ):
435
+ # note: always holds when self.block.dtype.kind == "V"
436
+ blk_dtype = self .block .dtype
437
+
438
+ if blk_dtype == np .dtype ("object" ):
439
+ # we want to avoid filling with np.nan if we are
440
+ # using None; we already know that we are all
441
+ # nulls
442
+ values = self .block .values .ravel (order = "K" )
443
+ if len (values ) and values [0 ] is None :
444
+ fill_value = None
399
445
400
446
if is_datetime64tz_dtype (empty_dtype ):
401
447
i8values = np .full (self .shape , fill_value .value )
@@ -464,7 +510,8 @@ def _concatenate_join_units(
464
510
465
511
empty_dtype = _get_empty_dtype (join_units )
466
512
467
- upcasted_na = _dtype_to_na_value (empty_dtype )
513
+ has_none_blocks = any (unit .block .dtype .kind == "V" for unit in join_units )
514
+ upcasted_na = _dtype_to_na_value (empty_dtype , has_none_blocks )
468
515
469
516
to_concat = [
470
517
ju .get_reindexed_values (empty_dtype = empty_dtype , upcasted_na = upcasted_na )
@@ -506,7 +553,7 @@ def _concatenate_join_units(
506
553
return concat_values
507
554
508
555
509
- def _dtype_to_na_value (dtype : DtypeObj ):
556
+ def _dtype_to_na_value (dtype : DtypeObj , has_none_blocks : bool ):
510
557
"""
511
558
Find the NA value to go with this dtype.
512
559
"""
@@ -544,9 +591,11 @@ def _get_empty_dtype(join_units: Sequence[JoinUnit]) -> DtypeObj:
544
591
empty_dtype = join_units [0 ].block .dtype
545
592
return empty_dtype
546
593
547
- has_none_blocks = any (unit .is_na for unit in join_units )
594
+ has_none_blocks = any (unit .block . dtype . kind == "V" for unit in join_units )
548
595
549
596
dtypes = [unit .dtype for unit in join_units if not unit .is_na ]
597
+ if not len (dtypes ):
598
+ dtypes = [unit .dtype for unit in join_units if unit .block .dtype .kind != "V" ]
550
599
551
600
dtype = find_common_type (dtypes )
552
601
if has_none_blocks :
0 commit comments