Skip to content

Commit 6e811d7

Browse files
jbrockmendelfeefladder
authored andcommitted
BUG: JoinUnit._is_valid_na_for (pandas-dev#43043)
1 parent 68c666c commit 6e811d7

File tree

2 files changed

+30
-11
lines changed

2 files changed

+30
-11
lines changed

pandas/core/internals/concat.py

+24-11
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,11 @@
1010

1111
import numpy as np
1212

13-
from pandas._libs import internals as libinternals
13+
from pandas._libs import (
14+
NaT,
15+
internals as libinternals,
16+
)
17+
from pandas._libs.missing import NA
1418
from pandas._typing import (
1519
ArrayLike,
1620
DtypeObj,
@@ -28,7 +32,7 @@
2832
is_1d_only_ea_obj,
2933
is_datetime64tz_dtype,
3034
is_dtype_equal,
31-
is_extension_array_dtype,
35+
needs_i8_conversion,
3236
)
3337
from pandas.core.dtypes.concat import (
3438
cast_to_common_type,
@@ -374,13 +378,20 @@ def _is_valid_na_for(self, dtype: DtypeObj) -> bool:
374378
values = self.block.values
375379
return all(is_valid_na_for_dtype(x, dtype) for x in values.ravel(order="K"))
376380

377-
if self.dtype.kind == dtype.kind == "M" and not is_dtype_equal(
378-
self.dtype, dtype
379-
):
381+
na_value = self.block.fill_value
382+
if na_value is NaT and not is_dtype_equal(self.dtype, dtype):
383+
# e.g. we are dt64 and other is td64
380384
# fill_values match but we should not cast self.block.values to dtype
385+
# TODO: this will need updating if we ever have non-nano dt64/td64
381386
return False
382387

383-
na_value = self.block.fill_value
388+
if na_value is NA and needs_i8_conversion(dtype):
389+
# FIXME: kludge; test_append_empty_frame_with_timedelta64ns_nat
390+
# e.g. self.dtype == "Int64" and dtype is td64, we dont want
391+
# to consider these as matching
392+
return False
393+
394+
# TODO: better to use can_hold_element?
384395
return is_valid_na_for_dtype(na_value, dtype)
385396

386397
@cache_readonly
@@ -426,9 +437,6 @@ def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike:
426437
i8values = np.full(self.shape, fill_value.value)
427438
return DatetimeArray(i8values, dtype=empty_dtype)
428439

429-
elif is_extension_array_dtype(blk_dtype):
430-
pass
431-
432440
elif is_1d_only_ea_dtype(empty_dtype):
433441
empty_dtype = cast(ExtensionDtype, empty_dtype)
434442
cls = empty_dtype.construct_array_type()
@@ -440,11 +448,16 @@ def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike:
440448
return missing_arr.take(
441449
empty_arr, allow_fill=True, fill_value=fill_value
442450
)
451+
elif isinstance(empty_dtype, ExtensionDtype):
452+
# TODO: no tests get here, a handful would if we disabled
453+
# the dt64tz special-case above (which is faster)
454+
cls = empty_dtype.construct_array_type()
455+
missing_arr = cls._empty(shape=self.shape, dtype=empty_dtype)
456+
missing_arr[:] = fill_value
457+
return missing_arr
443458
else:
444459
# NB: we should never get here with empty_dtype integer or bool;
445460
# if we did, the missing_arr.fill would cast to gibberish
446-
empty_dtype = cast(np.dtype, empty_dtype)
447-
448461
missing_arr = np.empty(self.shape, dtype=empty_dtype)
449462
missing_arr.fill(fill_value)
450463
return missing_arr

pandas/tests/reshape/concat/test_append.py

+6
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,12 @@ def test_append_empty_tz_frame_with_datetime64ns(self):
348348
expected = DataFrame({"a": [pd.NaT]}).astype(object)
349349
tm.assert_frame_equal(result, expected)
350350

351+
# mismatched tz
352+
other = Series({"a": pd.NaT}, dtype="datetime64[ns, US/Pacific]")
353+
result = df.append(other, ignore_index=True)
354+
expected = DataFrame({"a": [pd.NaT]}).astype(object)
355+
tm.assert_frame_equal(result, expected)
356+
351357
@pytest.mark.parametrize(
352358
"dtype_str", ["datetime64[ns, UTC]", "datetime64[ns]", "Int64", "int64"]
353359
)

0 commit comments

Comments
 (0)