From ca143507947962f10524840f367d2df9b6205f63 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 1 Oct 2019 09:44:46 -0500 Subject: [PATCH] Change maybe_promote fill_value to dt64/td64 NaT instead of iNaT --- pandas/core/dtypes/cast.py | 13 ++++--- pandas/tests/dtypes/cast/test_promote.py | 46 ++++++++---------------- 2 files changed, 23 insertions(+), 36 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index a3ad84ff89a66..08176af2b326d 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -339,7 +339,7 @@ def maybe_promote(dtype, fill_value=np.nan): # if we passed an array here, determine the fill value by dtype if isinstance(fill_value, np.ndarray): if issubclass(fill_value.dtype.type, (np.datetime64, np.timedelta64)): - fill_value = iNaT + fill_value = fill_value.dtype.type("NaT", "ns") else: # we need to change to object type as our @@ -350,9 +350,14 @@ def maybe_promote(dtype, fill_value=np.nan): # returns tuple of (dtype, fill_value) if issubclass(dtype.type, np.datetime64): - fill_value = tslibs.Timestamp(fill_value).value + fill_value = tslibs.Timestamp(fill_value).to_datetime64() elif issubclass(dtype.type, np.timedelta64): - fill_value = tslibs.Timedelta(fill_value).value + fv = tslibs.Timedelta(fill_value) + if fv is NaT: + # NaT has no `to_timedelta6` method + fill_value = np.timedelta64("NaT", "ns") + else: + fill_value = fv.to_timedelta64() elif is_datetime64tz_dtype(dtype): if isna(fill_value): fill_value = NaT @@ -393,7 +398,7 @@ def maybe_promote(dtype, fill_value=np.nan): dtype = np.float64 fill_value = np.nan elif is_datetime_or_timedelta_dtype(dtype): - fill_value = iNaT + fill_value = dtype.type("NaT", "ns") else: dtype = np.object_ fill_value = np.nan diff --git a/pandas/tests/dtypes/cast/test_promote.py b/pandas/tests/dtypes/cast/test_promote.py index 211c550100018..cf7a168074e9e 100644 --- a/pandas/tests/dtypes/cast/test_promote.py +++ b/pandas/tests/dtypes/cast/test_promote.py @@ -22,7 +22,7 @@ is_string_dtype, is_timedelta64_dtype, ) -from pandas.core.dtypes.dtypes import DatetimeTZDtype, PandasExtensionDtype +from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.core.dtypes.missing import isna import pandas as pd @@ -92,20 +92,6 @@ def box(request): return request.param -def _safe_dtype_assert(left_dtype, right_dtype): - """ - Compare two dtypes without raising TypeError. - """ - __tracebackhide__ = True - if isinstance(right_dtype, PandasExtensionDtype): - # switch order of equality check because numpy dtypes (e.g. if - # left_dtype is np.object_) do not know some expected dtypes (e.g. - # DatetimeTZDtype) and would raise a TypeError in their __eq__-method. - assert right_dtype == left_dtype - else: - assert left_dtype == right_dtype - - def _check_promote( dtype, fill_value, @@ -157,8 +143,11 @@ def _check_promote( result_dtype, result_fill_value = maybe_promote(dtype, fill_value) expected_fill_value = exp_val_for_scalar - _safe_dtype_assert(result_dtype, expected_dtype) + assert result_dtype == expected_dtype + _assert_match(result_fill_value, expected_fill_value) + +def _assert_match(result_fill_value, expected_fill_value): # GH#23982/25425 require the same type in addition to equality/NA-ness res_type = type(result_fill_value) ex_type = type(expected_fill_value) @@ -369,8 +358,8 @@ def test_maybe_promote_any_with_datetime64( if is_datetime64_dtype(dtype): expected_dtype = dtype # for datetime dtypes, scalar values get cast to pd.Timestamp.value - exp_val_for_scalar = pd.Timestamp(fill_value).value - exp_val_for_array = iNaT + exp_val_for_scalar = pd.Timestamp(fill_value).to_datetime64() + exp_val_for_array = np.datetime64("NaT", "ns") else: expected_dtype = np.dtype(object) exp_val_for_scalar = fill_value @@ -454,9 +443,7 @@ def test_maybe_promote_datetimetz_with_datetimetz( ) -@pytest.mark.parametrize( - "fill_value", [None, np.nan, NaT, iNaT], ids=["None", "np.nan", "pd.NaT", "iNaT"] -) +@pytest.mark.parametrize("fill_value", [None, np.nan, NaT, iNaT]) # override parametrization due to to many xfails; see GH 23982 / 25425 @pytest.mark.parametrize("box", [(False, None)]) def test_maybe_promote_datetimetz_with_na(tz_aware_fixture, fill_value, box): @@ -572,8 +559,8 @@ def test_maybe_promote_any_with_timedelta64( if is_timedelta64_dtype(dtype): expected_dtype = dtype # for timedelta dtypes, scalar values get cast to pd.Timedelta.value - exp_val_for_scalar = pd.Timedelta(fill_value).value - exp_val_for_array = iNaT + exp_val_for_scalar = pd.Timedelta(fill_value).to_timedelta64() + exp_val_for_array = np.timedelta64("NaT", "ns") else: expected_dtype = np.dtype(object) exp_val_for_scalar = fill_value @@ -714,9 +701,7 @@ def test_maybe_promote_any_with_object(any_numpy_dtype_reduced, object_dtype, bo ) -@pytest.mark.parametrize( - "fill_value", [None, np.nan, NaT, iNaT], ids=["None", "np.nan", "pd.NaT", "iNaT"] -) +@pytest.mark.parametrize("fill_value", [None, np.nan, NaT, iNaT]) # override parametrization due to to many xfails; see GH 23982 / 25425 @pytest.mark.parametrize("box", [(False, None)]) def test_maybe_promote_any_numpy_dtype_with_na( @@ -764,7 +749,7 @@ def test_maybe_promote_any_numpy_dtype_with_na( elif is_datetime_or_timedelta_dtype(dtype): # datetime / timedelta cast all missing values to iNaT expected_dtype = dtype - exp_val_for_scalar = iNaT + exp_val_for_scalar = dtype.type("NaT", "ns") elif fill_value is NaT: # NaT upcasts everything that's not datetime/timedelta to object expected_dtype = np.dtype(object) @@ -783,7 +768,7 @@ def test_maybe_promote_any_numpy_dtype_with_na( # integers cannot hold NaNs; maybe_promote_with_array returns None exp_val_for_array = None elif is_datetime_or_timedelta_dtype(expected_dtype): - exp_val_for_array = iNaT + exp_val_for_array = expected_dtype.type("NaT", "ns") else: # expected_dtype = float / complex / object exp_val_for_array = np.nan @@ -817,7 +802,4 @@ def test_maybe_promote_dimensions(any_numpy_dtype_reduced, dim): result_dtype, result_missing_value = maybe_promote(dtype, fill_array) assert result_dtype == expected_dtype - # None == None, iNaT == iNaT, but np.nan != np.nan - assert (result_missing_value == expected_missing_value) or ( - result_missing_value is np.nan and expected_missing_value is np.nan - ) + _assert_match(result_missing_value, expected_missing_value)