Skip to content

Commit e186e18

Browse files
authored
ENH: maybe_convert_objects corner cases (#41714)
1 parent 3eb26c7 commit e186e18

File tree

3 files changed

+115
-10
lines changed

3 files changed

+115
-10
lines changed

pandas/_libs/lib.pyi

+9-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,10 @@ from typing import (
1111

1212
import numpy as np
1313

14-
from pandas._typing import ArrayLike
14+
from pandas._typing import (
15+
ArrayLike,
16+
DtypeObj,
17+
)
1518

1619
# placeholder until we can specify np.ndarray[object, ndim=2]
1720
ndarray_obj_2d = np.ndarray
@@ -73,6 +76,7 @@ def maybe_convert_objects(
7376
convert_timedelta: bool = ...,
7477
convert_period: Literal[False] = ...,
7578
convert_to_nullable_integer: Literal[False] = ...,
79+
dtype_if_all_nat: DtypeObj | None = ...,
7680
) -> np.ndarray: ...
7781

7882
@overload
@@ -85,6 +89,7 @@ def maybe_convert_objects(
8589
convert_timedelta: bool = ...,
8690
convert_period: bool = ...,
8791
convert_to_nullable_integer: Literal[True] = ...,
92+
dtype_if_all_nat: DtypeObj | None = ...,
8893
) -> ArrayLike: ...
8994

9095
@overload
@@ -97,6 +102,7 @@ def maybe_convert_objects(
97102
convert_timedelta: bool = ...,
98103
convert_period: bool = ...,
99104
convert_to_nullable_integer: bool = ...,
105+
dtype_if_all_nat: DtypeObj | None = ...,
100106
) -> ArrayLike: ...
101107

102108
@overload
@@ -109,6 +115,7 @@ def maybe_convert_objects(
109115
convert_timedelta: bool = ...,
110116
convert_period: Literal[True] = ...,
111117
convert_to_nullable_integer: bool = ...,
118+
dtype_if_all_nat: DtypeObj | None = ...,
112119
) -> ArrayLike: ...
113120

114121
@overload
@@ -121,6 +128,7 @@ def maybe_convert_objects(
121128
convert_timedelta: bool = ...,
122129
convert_period: bool = ...,
123130
convert_to_nullable_integer: bool = ...,
131+
dtype_if_all_nat: DtypeObj | None = ...,
124132
) -> ArrayLike: ...
125133

126134
@overload

pandas/_libs/lib.pyx

+55-9
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,10 @@ from pandas._libs.util cimport (
8484
)
8585

8686
from pandas._libs.tslib import array_to_datetime
87+
from pandas._libs.tslibs import (
88+
OutOfBoundsDatetime,
89+
OutOfBoundsTimedelta,
90+
)
8791
from pandas._libs.tslibs.period import Period
8892

8993
from pandas._libs.missing cimport (
@@ -1652,7 +1656,8 @@ def infer_datetimelike_array(arr: ndarray[object]) -> tuple[str, bool]:
16521656
# convert *every* string array
16531657
if len(objs):
16541658
try:
1655-
array_to_datetime(objs, errors="raise")
1659+
# require_iso8601 as in maybe_infer_to_datetimelike
1660+
array_to_datetime(objs, errors="raise", require_iso8601=True)
16561661
return "datetime", seen_str
16571662
except (ValueError, TypeError):
16581663
pass
@@ -2334,7 +2339,8 @@ def maybe_convert_objects(ndarray[object] objects,
23342339
bint convert_timedelta=False,
23352340
bint convert_period=False,
23362341
bint convert_interval=False,
2337-
bint convert_to_nullable_integer=False) -> "ArrayLike":
2342+
bint convert_to_nullable_integer=False,
2343+
object dtype_if_all_nat=None) -> "ArrayLike":
23382344
"""
23392345
Type inference function-- convert object array to proper dtype
23402346

@@ -2363,6 +2369,8 @@ def maybe_convert_objects(ndarray[object] objects,
23632369
convert_to_nullable_integer : bool, default False
23642370
If an array-like object contains only integer values (and NaN) is
23652371
encountered, whether to convert and return an IntegerArray.
2372+
dtype_if_all_nat : np.dtype, ExtensionDtype, or None, default None
2373+
Dtype to cast to if we have all-NaT.
23662374

23672375
Returns
23682376
-------
@@ -2431,8 +2439,12 @@ def maybe_convert_objects(ndarray[object] objects,
24312439
seen.float_ = True
24322440
elif is_timedelta(val):
24332441
if convert_timedelta:
2434-
itimedeltas[i] = convert_to_timedelta64(val, "ns").view("i8")
24352442
seen.timedelta_ = True
2443+
try:
2444+
itimedeltas[i] = convert_to_timedelta64(val, "ns").view("i8")
2445+
except OutOfBoundsTimedelta:
2446+
seen.object_ = True
2447+
break
24362448
else:
24372449
seen.object_ = True
24382450
break
@@ -2469,8 +2481,12 @@ def maybe_convert_objects(ndarray[object] objects,
24692481
break
24702482
else:
24712483
seen.datetime_ = True
2472-
idatetimes[i] = convert_to_tsobject(
2473-
val, None, None, 0, 0).value
2484+
try:
2485+
idatetimes[i] = convert_to_tsobject(
2486+
val, None, None, 0, 0).value
2487+
except OutOfBoundsDatetime:
2488+
seen.object_ = True
2489+
break
24742490
else:
24752491
seen.object_ = True
24762492
break
@@ -2558,8 +2574,13 @@ def maybe_convert_objects(ndarray[object] objects,
25582574
elif seen.nat_:
25592575
if not seen.numeric_:
25602576
if convert_datetime and convert_timedelta:
2561-
# TODO: array full of NaT ambiguity resolve here needed
2562-
pass
2577+
dtype = dtype_if_all_nat
2578+
if dtype is not None:
2579+
# otherwise we keep object dtype
2580+
result = _infer_all_nats(
2581+
dtype, datetimes, timedeltas
2582+
)
2583+
25632584
elif convert_datetime:
25642585
result = datetimes
25652586
elif convert_timedelta:
@@ -2598,8 +2619,13 @@ def maybe_convert_objects(ndarray[object] objects,
25982619
elif seen.nat_:
25992620
if not seen.numeric_:
26002621
if convert_datetime and convert_timedelta:
2601-
# TODO: array full of NaT ambiguity resolve here needed
2602-
pass
2622+
dtype = dtype_if_all_nat
2623+
if dtype is not None:
2624+
# otherwise we keep object dtype
2625+
result = _infer_all_nats(
2626+
dtype, datetimes, timedeltas
2627+
)
2628+
26032629
elif convert_datetime:
26042630
result = datetimes
26052631
elif convert_timedelta:
@@ -2630,6 +2656,26 @@ def maybe_convert_objects(ndarray[object] objects,
26302656
return objects
26312657

26322658

2659+
cdef _infer_all_nats(dtype, ndarray datetimes, ndarray timedeltas):
2660+
"""
2661+
If we have all-NaT values, cast these to the given dtype.
2662+
"""
2663+
if isinstance(dtype, np.dtype):
2664+
if dtype == "M8[ns]":
2665+
result = datetimes
2666+
elif dtype == "m8[ns]":
2667+
result = timedeltas
2668+
else:
2669+
raise ValueError(dtype)
2670+
else:
2671+
# ExtensionDtype
2672+
cls = dtype.construct_array_type()
2673+
i8vals = np.empty(len(datetimes), dtype="i8")
2674+
i8vals.fill(NPY_NAT)
2675+
result = cls(i8vals, dtype=dtype)
2676+
return result
2677+
2678+
26332679
class NoDefault(Enum):
26342680
# We make this an Enum
26352681
# 1) because it round-trips through pickle correctly (see GH#40397)

pandas/tests/dtypes/test_inference.py

+51
Original file line numberDiff line numberDiff line change
@@ -665,6 +665,57 @@ def test_maybe_convert_objects_datetime(self):
665665
)
666666
tm.assert_numpy_array_equal(out, exp)
667667

668+
def test_maybe_convert_objects_dtype_if_all_nat(self):
669+
arr = np.array([pd.NaT, pd.NaT], dtype=object)
670+
out = lib.maybe_convert_objects(
671+
arr, convert_datetime=True, convert_timedelta=True
672+
)
673+
# no dtype_if_all_nat passed -> we dont guess
674+
tm.assert_numpy_array_equal(out, arr)
675+
676+
out = lib.maybe_convert_objects(
677+
arr,
678+
convert_datetime=True,
679+
convert_timedelta=True,
680+
dtype_if_all_nat=np.dtype("timedelta64[ns]"),
681+
)
682+
exp = np.array(["NaT", "NaT"], dtype="timedelta64[ns]")
683+
tm.assert_numpy_array_equal(out, exp)
684+
685+
out = lib.maybe_convert_objects(
686+
arr,
687+
convert_datetime=True,
688+
convert_timedelta=True,
689+
dtype_if_all_nat=np.dtype("datetime64[ns]"),
690+
)
691+
exp = np.array(["NaT", "NaT"], dtype="datetime64[ns]")
692+
tm.assert_numpy_array_equal(out, exp)
693+
694+
def test_maybe_convert_objects_dtype_if_all_nat_invalid(self):
695+
# we accept datetime64[ns], timedelta64[ns], and EADtype
696+
arr = np.array([pd.NaT, pd.NaT], dtype=object)
697+
698+
with pytest.raises(ValueError, match="int64"):
699+
lib.maybe_convert_objects(
700+
arr,
701+
convert_datetime=True,
702+
convert_timedelta=True,
703+
dtype_if_all_nat=np.dtype("int64"),
704+
)
705+
706+
@pytest.mark.parametrize("dtype", ["datetime64[ns]", "timedelta64[ns]"])
707+
def test_maybe_convert_objects_datetime_overflow_safe(self, dtype):
708+
stamp = datetime(2363, 10, 4) # Enterprise-D launch date
709+
if dtype == "timedelta64[ns]":
710+
stamp = stamp - datetime(1970, 1, 1)
711+
arr = np.array([stamp], dtype=object)
712+
713+
out = lib.maybe_convert_objects(
714+
arr, convert_datetime=True, convert_timedelta=True
715+
)
716+
# no OutOfBoundsDatetime/OutOfBoundsTimedeltas
717+
tm.assert_numpy_array_equal(out, arr)
718+
668719
def test_maybe_convert_objects_timedelta64_nat(self):
669720
obj = np.timedelta64("NaT", "ns")
670721
arr = np.array([obj], dtype=object)

0 commit comments

Comments
 (0)