Skip to content

Change maybe_promote fill_value to dt64/td64 NaT instead of iNaT #28725

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 1, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ def maybe_promote(dtype, fill_value=np.nan):
# if we passed an array here, determine the fill value by dtype
if isinstance(fill_value, np.ndarray):
if issubclass(fill_value.dtype.type, (np.datetime64, np.timedelta64)):
fill_value = iNaT
fill_value = fill_value.dtype.type("NaT", "ns")
else:

# we need to change to object type as our
Expand All @@ -350,9 +350,14 @@ def maybe_promote(dtype, fill_value=np.nan):

# returns tuple of (dtype, fill_value)
if issubclass(dtype.type, np.datetime64):
fill_value = tslibs.Timestamp(fill_value).value
fill_value = tslibs.Timestamp(fill_value).to_datetime64()
elif issubclass(dtype.type, np.timedelta64):
fill_value = tslibs.Timedelta(fill_value).value
fv = tslibs.Timedelta(fill_value)
if fv is NaT:
# NaT has no `to_timedelta6` method
fill_value = np.timedelta64("NaT", "ns")
else:
fill_value = fv.to_timedelta64()
elif is_datetime64tz_dtype(dtype):
if isna(fill_value):
fill_value = NaT
Expand Down Expand Up @@ -393,7 +398,7 @@ def maybe_promote(dtype, fill_value=np.nan):
dtype = np.float64
fill_value = np.nan
elif is_datetime_or_timedelta_dtype(dtype):
fill_value = iNaT
fill_value = dtype.type("NaT", "ns")
else:
dtype = np.object_
fill_value = np.nan
Expand Down
46 changes: 14 additions & 32 deletions pandas/tests/dtypes/cast/test_promote.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
is_string_dtype,
is_timedelta64_dtype,
)
from pandas.core.dtypes.dtypes import DatetimeTZDtype, PandasExtensionDtype
from pandas.core.dtypes.dtypes import DatetimeTZDtype
from pandas.core.dtypes.missing import isna

import pandas as pd
Expand Down Expand Up @@ -92,20 +92,6 @@ def box(request):
return request.param


def _safe_dtype_assert(left_dtype, right_dtype):
"""
Compare two dtypes without raising TypeError.
"""
__tracebackhide__ = True
if isinstance(right_dtype, PandasExtensionDtype):
# switch order of equality check because numpy dtypes (e.g. if
# left_dtype is np.object_) do not know some expected dtypes (e.g.
# DatetimeTZDtype) and would raise a TypeError in their __eq__-method.
assert right_dtype == left_dtype
else:
assert left_dtype == right_dtype


def _check_promote(
dtype,
fill_value,
Expand Down Expand Up @@ -157,8 +143,11 @@ def _check_promote(
result_dtype, result_fill_value = maybe_promote(dtype, fill_value)
expected_fill_value = exp_val_for_scalar

_safe_dtype_assert(result_dtype, expected_dtype)
assert result_dtype == expected_dtype
_assert_match(result_fill_value, expected_fill_value)


def _assert_match(result_fill_value, expected_fill_value):
# GH#23982/25425 require the same type in addition to equality/NA-ness
res_type = type(result_fill_value)
ex_type = type(expected_fill_value)
Expand Down Expand Up @@ -369,8 +358,8 @@ def test_maybe_promote_any_with_datetime64(
if is_datetime64_dtype(dtype):
expected_dtype = dtype
# for datetime dtypes, scalar values get cast to pd.Timestamp.value
exp_val_for_scalar = pd.Timestamp(fill_value).value
exp_val_for_array = iNaT
exp_val_for_scalar = pd.Timestamp(fill_value).to_datetime64()
exp_val_for_array = np.datetime64("NaT", "ns")
else:
expected_dtype = np.dtype(object)
exp_val_for_scalar = fill_value
Expand Down Expand Up @@ -454,9 +443,7 @@ def test_maybe_promote_datetimetz_with_datetimetz(
)


@pytest.mark.parametrize(
"fill_value", [None, np.nan, NaT, iNaT], ids=["None", "np.nan", "pd.NaT", "iNaT"]
)
@pytest.mark.parametrize("fill_value", [None, np.nan, NaT, iNaT])
# override parametrization due to to many xfails; see GH 23982 / 25425
@pytest.mark.parametrize("box", [(False, None)])
def test_maybe_promote_datetimetz_with_na(tz_aware_fixture, fill_value, box):
Expand Down Expand Up @@ -572,8 +559,8 @@ def test_maybe_promote_any_with_timedelta64(
if is_timedelta64_dtype(dtype):
expected_dtype = dtype
# for timedelta dtypes, scalar values get cast to pd.Timedelta.value
exp_val_for_scalar = pd.Timedelta(fill_value).value
exp_val_for_array = iNaT
exp_val_for_scalar = pd.Timedelta(fill_value).to_timedelta64()
exp_val_for_array = np.timedelta64("NaT", "ns")
else:
expected_dtype = np.dtype(object)
exp_val_for_scalar = fill_value
Expand Down Expand Up @@ -714,9 +701,7 @@ def test_maybe_promote_any_with_object(any_numpy_dtype_reduced, object_dtype, bo
)


@pytest.mark.parametrize(
"fill_value", [None, np.nan, NaT, iNaT], ids=["None", "np.nan", "pd.NaT", "iNaT"]
)
@pytest.mark.parametrize("fill_value", [None, np.nan, NaT, iNaT])
# override parametrization due to to many xfails; see GH 23982 / 25425
@pytest.mark.parametrize("box", [(False, None)])
def test_maybe_promote_any_numpy_dtype_with_na(
Expand Down Expand Up @@ -764,7 +749,7 @@ def test_maybe_promote_any_numpy_dtype_with_na(
elif is_datetime_or_timedelta_dtype(dtype):
# datetime / timedelta cast all missing values to iNaT
expected_dtype = dtype
exp_val_for_scalar = iNaT
exp_val_for_scalar = dtype.type("NaT", "ns")
elif fill_value is NaT:
# NaT upcasts everything that's not datetime/timedelta to object
expected_dtype = np.dtype(object)
Expand All @@ -783,7 +768,7 @@ def test_maybe_promote_any_numpy_dtype_with_na(
# integers cannot hold NaNs; maybe_promote_with_array returns None
exp_val_for_array = None
elif is_datetime_or_timedelta_dtype(expected_dtype):
exp_val_for_array = iNaT
exp_val_for_array = expected_dtype.type("NaT", "ns")
else: # expected_dtype = float / complex / object
exp_val_for_array = np.nan

Expand Down Expand Up @@ -817,7 +802,4 @@ def test_maybe_promote_dimensions(any_numpy_dtype_reduced, dim):
result_dtype, result_missing_value = maybe_promote(dtype, fill_array)

assert result_dtype == expected_dtype
# None == None, iNaT == iNaT, but np.nan != np.nan
assert (result_missing_value == expected_missing_value) or (
result_missing_value is np.nan and expected_missing_value is np.nan
)
_assert_match(result_missing_value, expected_missing_value)