Skip to content

Commit aee55c9

Browse files
authored
REF: simplify maybe_convert_objects (#53021)
1 parent 80a3705 commit aee55c9

File tree

8 files changed

+37
-140
lines changed

8 files changed

+37
-140
lines changed

pandas/_libs/lib.pyi

+4-55
Original file line numberDiff line numberDiff line change
@@ -70,45 +70,25 @@ def map_infer(
7070
convert: bool = ...,
7171
ignore_na: bool = ...,
7272
) -> np.ndarray: ...
73-
@overload # all convert_foo False -> only convert numeric
73+
@overload
7474
def maybe_convert_objects(
7575
objects: npt.NDArray[np.object_],
7676
*,
7777
try_float: bool = ...,
7878
safe: bool = ...,
7979
convert_numeric: bool = ...,
80-
convert_datetime: Literal[False] = ...,
81-
convert_timedelta: Literal[False] = ...,
82-
convert_period: Literal[False] = ...,
83-
convert_interval: Literal[False] = ...,
80+
convert_non_numeric: Literal[False] = ...,
8481
convert_to_nullable_dtype: Literal[False] = ...,
8582
dtype_if_all_nat: DtypeObj | None = ...,
8683
) -> npt.NDArray[np.object_ | np.number]: ...
87-
@overload # both convert_datetime and convert_to_nullable_integer False -> np.ndarray
88-
def maybe_convert_objects(
89-
objects: npt.NDArray[np.object_],
90-
*,
91-
try_float: bool = ...,
92-
safe: bool = ...,
93-
convert_numeric: bool = ...,
94-
convert_datetime: Literal[False] = ...,
95-
convert_timedelta: bool = ...,
96-
convert_period: Literal[False] = ...,
97-
convert_interval: Literal[False] = ...,
98-
convert_to_nullable_dtype: Literal[False] = ...,
99-
dtype_if_all_nat: DtypeObj | None = ...,
100-
) -> np.ndarray: ...
10184
@overload
10285
def maybe_convert_objects(
10386
objects: npt.NDArray[np.object_],
10487
*,
10588
try_float: bool = ...,
10689
safe: bool = ...,
10790
convert_numeric: bool = ...,
108-
convert_datetime: bool = ...,
109-
convert_timedelta: bool = ...,
110-
convert_period: bool = ...,
111-
convert_interval: bool = ...,
91+
convert_non_numeric: bool = ...,
11292
convert_to_nullable_dtype: Literal[True] = ...,
11393
dtype_if_all_nat: DtypeObj | None = ...,
11494
) -> ArrayLike: ...
@@ -119,38 +99,7 @@ def maybe_convert_objects(
11999
try_float: bool = ...,
120100
safe: bool = ...,
121101
convert_numeric: bool = ...,
122-
convert_datetime: Literal[True] = ...,
123-
convert_timedelta: bool = ...,
124-
convert_period: bool = ...,
125-
convert_interval: bool = ...,
126-
convert_to_nullable_dtype: bool = ...,
127-
dtype_if_all_nat: DtypeObj | None = ...,
128-
) -> ArrayLike: ...
129-
@overload
130-
def maybe_convert_objects(
131-
objects: npt.NDArray[np.object_],
132-
*,
133-
try_float: bool = ...,
134-
safe: bool = ...,
135-
convert_numeric: bool = ...,
136-
convert_datetime: bool = ...,
137-
convert_timedelta: bool = ...,
138-
convert_period: Literal[True] = ...,
139-
convert_interval: bool = ...,
140-
convert_to_nullable_dtype: bool = ...,
141-
dtype_if_all_nat: DtypeObj | None = ...,
142-
) -> ArrayLike: ...
143-
@overload
144-
def maybe_convert_objects(
145-
objects: npt.NDArray[np.object_],
146-
*,
147-
try_float: bool = ...,
148-
safe: bool = ...,
149-
convert_numeric: bool = ...,
150-
convert_datetime: bool = ...,
151-
convert_timedelta: bool = ...,
152-
convert_period: bool = ...,
153-
convert_interval: bool = ...,
102+
convert_non_numeric: bool = ...,
154103
convert_to_nullable_dtype: bool = ...,
155104
dtype_if_all_nat: DtypeObj | None = ...,
156105
) -> ArrayLike: ...

pandas/_libs/lib.pyx

+12-36
Original file line numberDiff line numberDiff line change
@@ -2396,11 +2396,8 @@ def maybe_convert_objects(ndarray[object] objects,
23962396
bint try_float=False,
23972397
bint safe=False,
23982398
bint convert_numeric=True, # NB: different default!
2399-
bint convert_datetime=False,
2400-
bint convert_timedelta=False,
2401-
bint convert_period=False,
2402-
bint convert_interval=False,
24032399
bint convert_to_nullable_dtype=False,
2400+
bint convert_non_numeric=False,
24042401
object dtype_if_all_nat=None) -> "ArrayLike":
24052402
"""
24062403
Type inference function-- convert object array to proper dtype
@@ -2417,21 +2414,11 @@ def maybe_convert_objects(ndarray[object] objects,
24172414
True, no upcasting will be performed.
24182415
convert_numeric : bool, default True
24192416
Whether to convert numeric entries.
2420-
convert_datetime : bool, default False
2421-
If an array-like object contains only datetime values or NaT is
2422-
encountered, whether to convert and return an array of M8[ns] dtype.
2423-
convert_timedelta : bool, default False
2424-
If an array-like object contains only timedelta values or NaT is
2425-
encountered, whether to convert and return an array of m8[ns] dtype.
2426-
convert_period : bool, default False
2427-
If an array-like object contains only (homogeneous-freq) Period values
2428-
or NaT, whether to convert and return a PeriodArray.
2429-
convert_interval : bool, default False
2430-
If an array-like object contains only Interval objects (with matching
2431-
dtypes and closedness) or NaN, whether to convert to IntervalArray.
24322417
convert_to_nullable_dtype : bool, default False
24332418
If an array-like object contains only integer or boolean values (and NaN) is
24342419
encountered, whether to convert and return an Boolean/IntegerArray.
2420+
convert_non_numeric : bool, default False
2421+
Whether to convert datetime, timedelta, period, interval types.
24352422
dtype_if_all_nat : np.dtype, ExtensionDtype, or None, default None
24362423
Dtype to cast to if we have all-NaT.
24372424

@@ -2454,12 +2441,11 @@ def maybe_convert_objects(ndarray[object] objects,
24542441

24552442
if dtype_if_all_nat is not None:
24562443
# in practice we don't expect to ever pass dtype_if_all_nat
2457-
# without both convert_datetime and convert_timedelta, so disallow
2444+
# without both convert_non_numeric, so disallow
24582445
# it to avoid needing to handle it below.
2459-
if not convert_datetime or not convert_timedelta:
2446+
if not convert_non_numeric:
24602447
raise ValueError(
2461-
"Cannot specify 'dtype_if_all_nat' without convert_datetime=True "
2462-
"and convert_timedelta=True"
2448+
"Cannot specify 'dtype_if_all_nat' without convert_non_numeric=True"
24632449
)
24642450

24652451
n = len(objects)
@@ -2484,7 +2470,7 @@ def maybe_convert_objects(ndarray[object] objects,
24842470
mask[i] = True
24852471
elif val is NaT:
24862472
seen.nat_ = True
2487-
if not (convert_datetime or convert_timedelta or convert_period):
2473+
if not convert_non_numeric:
24882474
seen.object_ = True
24892475
break
24902476
elif util.is_nan(val):
@@ -2502,7 +2488,7 @@ def maybe_convert_objects(ndarray[object] objects,
25022488
if not convert_numeric:
25032489
break
25042490
elif is_timedelta(val):
2505-
if convert_timedelta:
2491+
if convert_non_numeric:
25062492
seen.timedelta_ = True
25072493
try:
25082494
convert_to_timedelta64(val, "ns")
@@ -2543,7 +2529,7 @@ def maybe_convert_objects(ndarray[object] objects,
25432529
elif PyDateTime_Check(val) or util.is_datetime64_object(val):
25442530

25452531
# if we have an tz's attached then return the objects
2546-
if convert_datetime:
2532+
if convert_non_numeric:
25472533
if getattr(val, "tzinfo", None) is not None:
25482534
seen.datetimetz_ = True
25492535
break
@@ -2559,7 +2545,7 @@ def maybe_convert_objects(ndarray[object] objects,
25592545
seen.object_ = True
25602546
break
25612547
elif is_period_object(val):
2562-
if convert_period:
2548+
if convert_non_numeric:
25632549
seen.period_ = True
25642550
break
25652551
else:
@@ -2575,7 +2561,7 @@ def maybe_convert_objects(ndarray[object] objects,
25752561
seen.object_ = True
25762562
break
25772563
elif is_interval(val):
2578-
if convert_interval:
2564+
if convert_non_numeric:
25792565
seen.interval_ = True
25802566
break
25812567
else:
@@ -2661,18 +2647,8 @@ def maybe_convert_objects(ndarray[object] objects,
26612647
elif dtype is not None:
26622648
# EA, we don't expect to get here, but _could_ implement
26632649
raise NotImplementedError(dtype)
2664-
elif convert_datetime and convert_timedelta:
2665-
# we don't guess
2666-
seen.object_ = True
2667-
elif convert_datetime:
2668-
res = np.empty((<object>objects).shape, dtype="M8[ns]")
2669-
res[:] = NPY_NAT
2670-
return res
2671-
elif convert_timedelta:
2672-
res = np.empty((<object>objects).shape, dtype="m8[ns]")
2673-
res[:] = NPY_NAT
2674-
return res
26752650
else:
2651+
# we don't guess
26762652
seen.object_ = True
26772653
else:
26782654
seen.object_ = True

pandas/core/dtypes/cast.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -1171,10 +1171,7 @@ def maybe_infer_to_datetimelike(
11711171
# Here we do not convert numeric dtypes, as if we wanted that,
11721172
# numpy would have done it for us.
11731173
convert_numeric=False,
1174-
convert_period=True,
1175-
convert_interval=True,
1176-
convert_timedelta=True,
1177-
convert_datetime=True,
1174+
convert_non_numeric=True,
11781175
dtype_if_all_nat=np.dtype("M8[ns]"),
11791176
)
11801177

pandas/core/indexes/base.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -6754,10 +6754,7 @@ def infer_objects(self, copy: bool = True) -> Index:
67546754
values = cast("npt.NDArray[np.object_]", values)
67556755
res_values = lib.maybe_convert_objects(
67566756
values,
6757-
convert_datetime=True,
6758-
convert_timedelta=True,
6759-
convert_period=True,
6760-
convert_interval=True,
6757+
convert_non_numeric=True,
67616758
)
67626759
if copy and res_values is values:
67636760
return self.copy()

pandas/core/internals/array_manager.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -390,10 +390,7 @@ def _convert(arr):
390390
arr = np.asarray(arr)
391391
result = lib.maybe_convert_objects(
392392
arr,
393-
convert_datetime=True,
394-
convert_timedelta=True,
395-
convert_period=True,
396-
convert_interval=True,
393+
convert_non_numeric=True,
397394
)
398395
if result is arr and copy:
399396
return arr.copy()

pandas/core/internals/blocks.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -2284,10 +2284,7 @@ def convert(
22842284

22852285
res_values = lib.maybe_convert_objects(
22862286
values,
2287-
convert_datetime=True,
2288-
convert_timedelta=True,
2289-
convert_period=True,
2290-
convert_interval=True,
2287+
convert_non_numeric=True,
22912288
)
22922289
refs = None
22932290
if copy and res_values is values:

pandas/core/internals/construction.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1017,8 +1017,8 @@ def convert(arr):
10171017
# 1) we DO get here when arr is all Timestamps and dtype=None
10181018
# 2) disabling this doesn't break the world, so this must be
10191019
# getting caught at a higher level
1020-
# 3) passing convert_datetime to maybe_convert_objects get this right
1021-
# 4) convert_timedelta?
1020+
# 3) passing convert_non_numeric to maybe_convert_objects get this right
1021+
# 4) convert_non_numeric?
10221022

10231023
if dtype is None:
10241024
if arr.dtype == np.dtype("O"):

pandas/tests/dtypes/test_inference.py

+15-31
Original file line numberDiff line numberDiff line change
@@ -727,17 +727,15 @@ def test_maybe_convert_objects_nat_inference(self, val, dtype):
727727
vals = np.array([pd.NaT, val], dtype=object)
728728
result = lib.maybe_convert_objects(
729729
vals,
730-
convert_datetime=True,
731-
convert_timedelta=True,
730+
convert_non_numeric=True,
732731
dtype_if_all_nat=dtype,
733732
)
734733
assert result.dtype == dtype
735734
assert np.isnat(result).all()
736735

737736
result = lib.maybe_convert_objects(
738737
vals[::-1],
739-
convert_datetime=True,
740-
convert_timedelta=True,
738+
convert_non_numeric=True,
741739
dtype_if_all_nat=dtype,
742740
)
743741
assert result.dtype == dtype
@@ -777,47 +775,37 @@ def test_maybe_convert_objects_datetime(self):
777775
[np.datetime64("2000-01-01"), np.timedelta64(1, "s")], dtype=object
778776
)
779777
exp = arr.copy()
780-
out = lib.maybe_convert_objects(
781-
arr, convert_datetime=True, convert_timedelta=True
782-
)
778+
out = lib.maybe_convert_objects(arr, convert_non_numeric=True)
783779
tm.assert_numpy_array_equal(out, exp)
784780

785781
arr = np.array([pd.NaT, np.timedelta64(1, "s")], dtype=object)
786782
exp = np.array([np.timedelta64("NaT"), np.timedelta64(1, "s")], dtype="m8[ns]")
787-
out = lib.maybe_convert_objects(
788-
arr, convert_datetime=True, convert_timedelta=True
789-
)
783+
out = lib.maybe_convert_objects(arr, convert_non_numeric=True)
790784
tm.assert_numpy_array_equal(out, exp)
791785

792-
# with convert_timedelta=True, the nan is a valid NA value for td64
786+
# with convert_non_numeric=True, the nan is a valid NA value for td64
793787
arr = np.array([np.timedelta64(1, "s"), np.nan], dtype=object)
794788
exp = exp[::-1]
795-
out = lib.maybe_convert_objects(
796-
arr, convert_datetime=True, convert_timedelta=True
797-
)
789+
out = lib.maybe_convert_objects(arr, convert_non_numeric=True)
798790
tm.assert_numpy_array_equal(out, exp)
799791

800792
def test_maybe_convert_objects_dtype_if_all_nat(self):
801793
arr = np.array([pd.NaT, pd.NaT], dtype=object)
802-
out = lib.maybe_convert_objects(
803-
arr, convert_datetime=True, convert_timedelta=True
804-
)
794+
out = lib.maybe_convert_objects(arr, convert_non_numeric=True)
805795
# no dtype_if_all_nat passed -> we dont guess
806796
tm.assert_numpy_array_equal(out, arr)
807797

808798
out = lib.maybe_convert_objects(
809799
arr,
810-
convert_datetime=True,
811-
convert_timedelta=True,
800+
convert_non_numeric=True,
812801
dtype_if_all_nat=np.dtype("timedelta64[ns]"),
813802
)
814803
exp = np.array(["NaT", "NaT"], dtype="timedelta64[ns]")
815804
tm.assert_numpy_array_equal(out, exp)
816805

817806
out = lib.maybe_convert_objects(
818807
arr,
819-
convert_datetime=True,
820-
convert_timedelta=True,
808+
convert_non_numeric=True,
821809
dtype_if_all_nat=np.dtype("datetime64[ns]"),
822810
)
823811
exp = np.array(["NaT", "NaT"], dtype="datetime64[ns]")
@@ -830,8 +818,7 @@ def test_maybe_convert_objects_dtype_if_all_nat_invalid(self):
830818
with pytest.raises(ValueError, match="int64"):
831819
lib.maybe_convert_objects(
832820
arr,
833-
convert_datetime=True,
834-
convert_timedelta=True,
821+
convert_non_numeric=True,
835822
dtype_if_all_nat=np.dtype("int64"),
836823
)
837824

@@ -842,9 +829,7 @@ def test_maybe_convert_objects_datetime_overflow_safe(self, dtype):
842829
stamp = stamp - datetime(1970, 1, 1)
843830
arr = np.array([stamp], dtype=object)
844831

845-
out = lib.maybe_convert_objects(
846-
arr, convert_datetime=True, convert_timedelta=True
847-
)
832+
out = lib.maybe_convert_objects(arr, convert_non_numeric=True)
848833
# no OutOfBoundsDatetime/OutOfBoundsTimedeltas
849834
tm.assert_numpy_array_equal(out, arr)
850835

@@ -855,15 +840,15 @@ def test_maybe_convert_objects_mixed_datetimes(self):
855840
for data in itertools.permutations(vals):
856841
data = np.array(list(data), dtype=object)
857842
expected = DatetimeIndex(data)._data._ndarray
858-
result = lib.maybe_convert_objects(data, convert_datetime=True)
843+
result = lib.maybe_convert_objects(data, convert_non_numeric=True)
859844
tm.assert_numpy_array_equal(result, expected)
860845

861846
def test_maybe_convert_objects_timedelta64_nat(self):
862847
obj = np.timedelta64("NaT", "ns")
863848
arr = np.array([obj], dtype=object)
864849
assert arr[0] is obj
865850

866-
result = lib.maybe_convert_objects(arr, convert_timedelta=True)
851+
result = lib.maybe_convert_objects(arr, convert_non_numeric=True)
867852

868853
expected = np.array([obj], dtype="m8[ns]")
869854
tm.assert_numpy_array_equal(result, expected)
@@ -1037,7 +1022,7 @@ def test_maybe_convert_objects_itemsize(self, data0, data1):
10371022
def test_mixed_dtypes_remain_object_array(self):
10381023
# GH14956
10391024
arr = np.array([datetime(2015, 1, 1, tzinfo=pytz.utc), 1], dtype=object)
1040-
result = lib.maybe_convert_objects(arr, convert_datetime=True)
1025+
result = lib.maybe_convert_objects(arr, convert_non_numeric=True)
10411026
tm.assert_numpy_array_equal(result, arr)
10421027

10431028
@pytest.mark.parametrize(
@@ -1050,8 +1035,7 @@ def test_mixed_dtypes_remain_object_array(self):
10501035
def test_maybe_convert_objects_ea(self, idx):
10511036
result = lib.maybe_convert_objects(
10521037
np.array(idx, dtype=object),
1053-
convert_period=True,
1054-
convert_interval=True,
1038+
convert_non_numeric=True,
10551039
)
10561040
tm.assert_extension_array_equal(result, idx._data)
10571041

0 commit comments

Comments
 (0)