Skip to content

ENH: maybe_convert_objects corner cases #41714

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jun 2, 2021
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion pandas/_libs/lib.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@ from typing import (

import numpy as np

from pandas._typing import ArrayLike
from pandas._typing import (
ArrayLike,
DtypeObj,
)

# placeholder until we can specify np.ndarray[object, ndim=2]
ndarray_obj_2d = np.ndarray
Expand Down Expand Up @@ -73,6 +76,7 @@ def maybe_convert_objects(
convert_timedelta: bool = ...,
convert_period: Literal[False] = ...,
convert_to_nullable_integer: Literal[False] = ...,
dtype_if_all_nat: DtypeObj | None = ...,
) -> np.ndarray: ...

@overload
Expand All @@ -85,6 +89,7 @@ def maybe_convert_objects(
convert_timedelta: bool = ...,
convert_period: bool = ...,
convert_to_nullable_integer: Literal[True] = ...,
dtype_if_all_nat: DtypeObj | None = ...,
) -> ArrayLike: ...

@overload
Expand All @@ -97,6 +102,7 @@ def maybe_convert_objects(
convert_timedelta: bool = ...,
convert_period: bool = ...,
convert_to_nullable_integer: bool = ...,
dtype_if_all_nat: DtypeObj | None = ...,
) -> ArrayLike: ...

@overload
Expand All @@ -109,6 +115,7 @@ def maybe_convert_objects(
convert_timedelta: bool = ...,
convert_period: Literal[True] = ...,
convert_to_nullable_integer: bool = ...,
dtype_if_all_nat: DtypeObj | None = ...,
) -> ArrayLike: ...

@overload
Expand All @@ -121,6 +128,7 @@ def maybe_convert_objects(
convert_timedelta: bool = ...,
convert_period: bool = ...,
convert_to_nullable_integer: bool = ...,
dtype_if_all_nat: DtypeObj | None = ...,
) -> ArrayLike: ...

@overload
Expand Down
64 changes: 55 additions & 9 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,10 @@ from pandas._libs.util cimport (
)

from pandas._libs.tslib import array_to_datetime
from pandas._libs.tslibs import (
OutOfBoundsDatetime,
OutOfBoundsTimedelta,
)
from pandas._libs.tslibs.period import Period

from pandas._libs.missing cimport (
Expand Down Expand Up @@ -1636,7 +1640,8 @@ def infer_datetimelike_array(arr: ndarray[object]) -> str:
# convert *every* string array
if len(objs):
try:
array_to_datetime(objs, errors="raise")
# require_iso8601 as in maybe_infer_to_datetimelike
array_to_datetime(objs, errors="raise", require_iso8601=True)
return "datetime"
except (ValueError, TypeError):
pass
Expand Down Expand Up @@ -2275,7 +2280,8 @@ def maybe_convert_objects(ndarray[object] objects,
bint convert_datetime=False,
bint convert_timedelta=False,
bint convert_period=False,
bint convert_to_nullable_integer=False) -> "ArrayLike":
bint convert_to_nullable_integer=False,
object dtype_if_all_nat=None) -> "ArrayLike":
"""
Type inference function-- convert object array to proper dtype

Expand All @@ -2301,6 +2307,8 @@ def maybe_convert_objects(ndarray[object] objects,
convert_to_nullable_integer : bool, default False
If an array-like object contains only integer values (and NaN) is
encountered, whether to convert and return an IntegerArray.
dtype_if_all_nat : np.dtype, ExtensionDtype, or None, default None
Dtype to cast to if we have all-NaT.

Returns
-------
Expand Down Expand Up @@ -2369,8 +2377,12 @@ def maybe_convert_objects(ndarray[object] objects,
seen.float_ = True
elif is_timedelta(val):
if convert_timedelta:
itimedeltas[i] = convert_to_timedelta64(val, "ns").view("i8")
seen.timedelta_ = True
try:
itimedeltas[i] = convert_to_timedelta64(val, "ns").view("i8")
except OutOfBoundsTimedelta:
seen.object_ = True
break
else:
seen.object_ = True
break
Expand Down Expand Up @@ -2407,8 +2419,12 @@ def maybe_convert_objects(ndarray[object] objects,
break
else:
seen.datetime_ = True
idatetimes[i] = convert_to_tsobject(
val, None, None, 0, 0).value
try:
idatetimes[i] = convert_to_tsobject(
val, None, None, 0, 0).value
except OutOfBoundsDatetime:
seen.object_ = True
break
else:
seen.object_ = True
break
Expand Down Expand Up @@ -2478,8 +2494,13 @@ def maybe_convert_objects(ndarray[object] objects,
elif seen.nat_:
if not seen.numeric_:
if convert_datetime and convert_timedelta:
# TODO: array full of NaT ambiguity resolve here needed
pass
dtype = dtype_if_all_nat
if dtype is not None:
# otherwise we keep object dtype
result = _infer_all_nats(
dtype, datetimes, timedeltas
)

elif convert_datetime:
result = datetimes
elif convert_timedelta:
Expand Down Expand Up @@ -2518,8 +2539,13 @@ def maybe_convert_objects(ndarray[object] objects,
elif seen.nat_:
if not seen.numeric_:
if convert_datetime and convert_timedelta:
# TODO: array full of NaT ambiguity resolve here needed
pass
dtype = dtype_if_all_nat
if dtype is not None:
# otherwise we keep object dtype
result = _infer_all_nats(
dtype, datetimes, timedeltas
)

elif convert_datetime:
result = datetimes
elif convert_timedelta:
Expand Down Expand Up @@ -2550,6 +2576,26 @@ def maybe_convert_objects(ndarray[object] objects,
return objects


cdef _infer_all_nats(dtype, ndarray datetimes, ndarray timedeltas):
"""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you type dtype & narrow the types on datetimes/timedeltas if possible.

also a return type ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not really, as there is no good way to declare ndarray[dt64], and both the dtype arg and return type aren't representable here

If we have all-NaT values, cast these to the given dtype.
"""
if isinstance(dtype, np.dtype):
if dtype == "M8[ns]":
result = datetimes
elif dtype == "m8[ns]":
result = timedeltas
else:
raise ValueError(dtype)
else:
# ExtensionDtype
cls = dtype.construct_array_type()
i8vals = np.empty(len(datetimes), dtype="i8")
i8vals.fill(NPY_NAT)
result = cls(i8vals, dtype=dtype)
return result


class NoDefault(Enum):
# We make this an Enum
# 1) because it round-trips through pickle correctly (see GH#40397)
Expand Down
39 changes: 39 additions & 0 deletions pandas/tests/dtypes/test_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -665,6 +665,45 @@ def test_maybe_convert_objects_datetime(self):
)
tm.assert_numpy_array_equal(out, exp)

def test_maybe_convert_objects_dtype_if_all_nat(self):
arr = np.array([pd.NaT, pd.NaT], dtype=object)
out = lib.maybe_convert_objects(
arr, convert_datetime=True, convert_timedelta=True
)
# no dtype_if_all_nat passed -> we dont guess
tm.assert_numpy_array_equal(out, arr)

out = lib.maybe_convert_objects(
arr,
convert_datetime=True,
convert_timedelta=True,
dtype_if_all_nat=np.dtype("timedelta64[ns]"),
)
exp = np.array(["NaT", "NaT"], dtype="timedelta64[ns]")
tm.assert_numpy_array_equal(out, exp)

out = lib.maybe_convert_objects(
arr,
convert_datetime=True,
convert_timedelta=True,
dtype_if_all_nat=np.dtype("datetime64[ns]"),
)
exp = np.array(["NaT", "NaT"], dtype="datetime64[ns]")
tm.assert_numpy_array_equal(out, exp)

@pytest.mark.parametrize("dtype", ["datetime64[ns]", "timedelta64[ns]"])
def test_maybe_convert_objects_datetime_overflow_safe(self, dtype):
stamp = datetime(2363, 10, 4) # Enterprise-D launch date
if dtype == "timedelta64[ns]":
stamp = stamp - datetime(1970, 1, 1)
arr = np.array([stamp], dtype=object)

out = lib.maybe_convert_objects(
arr, convert_datetime=True, convert_timedelta=True
)
# no OutOfBoundsDatetime/OutOfBoundsTimedeltas
tm.assert_numpy_array_equal(out, arr)

def test_maybe_convert_objects_timedelta64_nat(self):
obj = np.timedelta64("NaT", "ns")
arr = np.array([obj], dtype=object)
Expand Down