Skip to content

Commit 4117f98

Browse files
authored
REF: simplify maybe_convert_objects (pandas-dev#49588)
1 parent 5a36b5f commit 4117f98

File tree

2 files changed

+55
-89
lines changed

2 files changed

+55
-89
lines changed

pandas/_libs/lib.pyx

+49-81
Original file line numberDiff line numberDiff line change
@@ -1284,13 +1284,12 @@ cdef class Seen:
12841284

12851285
@property
12861286
def is_bool(self):
1287-
return not (self.datetime_ or self.numeric_ or self.timedelta_
1288-
or self.nat_)
1289-
1290-
@property
1291-
def is_float_or_complex(self):
1292-
return not (self.bool_ or self.datetime_ or self.timedelta_
1293-
or self.nat_)
1287+
# i.e. not (anything but bool)
1288+
return not (
1289+
self.datetime_ or self.datetimetz_ or self.timedelta_ or self.nat_
1290+
or self.period_ or self.interval_
1291+
or self.numeric_ or self.nan_ or self.null_ or self.object_
1292+
)
12941293

12951294

12961295
cdef object _try_infer_map(object dtype):
@@ -2448,8 +2447,6 @@ def maybe_convert_objects(ndarray[object] objects,
24482447
ndarray[int64_t] ints
24492448
ndarray[uint64_t] uints
24502449
ndarray[uint8_t] bools
2451-
int64_t[::1] idatetimes
2452-
int64_t[::1] itimedeltas
24532450
Seen seen = Seen()
24542451
object val
24552452
float64_t fnan = np.nan
@@ -2473,14 +2470,6 @@ def maybe_convert_objects(ndarray[object] objects,
24732470
bools = cnp.PyArray_EMPTY(1, objects.shape, cnp.NPY_UINT8, 0)
24742471
mask = np.full(n, False)
24752472

2476-
if convert_datetime:
2477-
datetimes = np.empty(n, dtype='M8[ns]')
2478-
idatetimes = datetimes.view(np.int64)
2479-
2480-
if convert_timedelta:
2481-
timedeltas = np.empty(n, dtype='m8[ns]')
2482-
itimedeltas = timedeltas.view(np.int64)
2483-
24842473
for i in range(n):
24852474
val = objects[i]
24862475
if itemsize_max != -1:
@@ -2494,10 +2483,6 @@ def maybe_convert_objects(ndarray[object] objects,
24942483
mask[i] = True
24952484
elif val is NaT:
24962485
seen.nat_ = True
2497-
if convert_datetime:
2498-
idatetimes[i] = NPY_NAT
2499-
if convert_timedelta:
2500-
itimedeltas[i] = NPY_NAT
25012486
if not (convert_datetime or convert_timedelta or convert_period):
25022487
seen.object_ = True
25032488
break
@@ -2515,7 +2500,7 @@ def maybe_convert_objects(ndarray[object] objects,
25152500
if convert_timedelta:
25162501
seen.timedelta_ = True
25172502
try:
2518-
itimedeltas[i] = convert_to_timedelta64(val, "ns").view("i8")
2503+
convert_to_timedelta64(val, "ns")
25192504
except OutOfBoundsTimedelta:
25202505
seen.object_ = True
25212506
break
@@ -2556,8 +2541,7 @@ def maybe_convert_objects(ndarray[object] objects,
25562541
else:
25572542
seen.datetime_ = True
25582543
try:
2559-
idatetimes[i] = convert_to_tsobject(
2560-
val, None, None, 0, 0).value
2544+
convert_to_tsobject(val, None, None, 0, 0)
25612545
except OutOfBoundsDatetime:
25622546
seen.object_ = True
25632547
break
@@ -2683,76 +2667,60 @@ def maybe_convert_objects(ndarray[object] objects,
26832667
else:
26842668
seen.object_ = True
26852669

2670+
if seen.bool_:
2671+
if seen.is_bool:
2672+
# is_bool property rules out everything else
2673+
return bools.view(np.bool_)
2674+
seen.object_ = True
2675+
26862676
if not seen.object_:
26872677
result = None
26882678
if not safe:
26892679
if seen.null_ or seen.nan_:
2690-
if seen.is_float_or_complex:
2691-
if seen.complex_:
2692-
result = complexes
2693-
elif seen.float_:
2694-
result = floats
2695-
elif seen.int_:
2696-
if convert_to_nullable_integer:
2697-
from pandas.core.arrays import IntegerArray
2698-
result = IntegerArray(ints, mask)
2699-
else:
2700-
result = floats
2701-
elif seen.nan_:
2680+
if seen.complex_:
2681+
result = complexes
2682+
elif seen.float_:
2683+
result = floats
2684+
elif seen.int_:
2685+
if convert_to_nullable_integer:
2686+
from pandas.core.arrays import IntegerArray
2687+
result = IntegerArray(ints, mask)
2688+
else:
27022689
result = floats
2690+
elif seen.nan_:
2691+
result = floats
27032692
else:
2704-
if not seen.bool_:
2705-
if seen.datetime_:
2706-
if not seen.numeric_ and not seen.timedelta_:
2707-
result = datetimes
2708-
elif seen.timedelta_:
2709-
if not seen.numeric_:
2710-
result = timedeltas
2693+
if seen.complex_:
2694+
result = complexes
2695+
elif seen.float_:
2696+
result = floats
2697+
elif seen.int_:
2698+
if seen.uint_:
2699+
result = uints
27112700
else:
2712-
if seen.complex_:
2713-
result = complexes
2714-
elif seen.float_:
2715-
result = floats
2716-
elif seen.int_:
2717-
if seen.uint_:
2718-
result = uints
2719-
else:
2720-
result = ints
2721-
elif seen.is_bool:
2722-
result = bools.view(np.bool_)
2701+
result = ints
27232702

27242703
else:
27252704
# don't cast int to float, etc.
27262705
if seen.null_:
2727-
if seen.is_float_or_complex:
2728-
if seen.complex_:
2729-
if not seen.int_:
2730-
result = complexes
2731-
elif seen.float_ or seen.nan_:
2732-
if not seen.int_:
2733-
result = floats
2706+
if seen.complex_:
2707+
if not seen.int_:
2708+
result = complexes
2709+
elif seen.float_ or seen.nan_:
2710+
if not seen.int_:
2711+
result = floats
27342712
else:
2735-
if not seen.bool_:
2736-
if seen.datetime_:
2737-
if not seen.numeric_ and not seen.timedelta_:
2738-
result = datetimes
2739-
elif seen.timedelta_:
2740-
if not seen.numeric_:
2741-
result = timedeltas
2713+
if seen.complex_:
2714+
if not seen.int_:
2715+
result = complexes
2716+
elif seen.float_ or seen.nan_:
2717+
if not seen.int_:
2718+
result = floats
2719+
elif seen.int_:
2720+
if seen.uint_:
2721+
result = uints
27422722
else:
2743-
if seen.complex_:
2744-
if not seen.int_:
2745-
result = complexes
2746-
elif seen.float_ or seen.nan_:
2747-
if not seen.int_:
2748-
result = floats
2749-
elif seen.int_:
2750-
if seen.uint_:
2751-
result = uints
2752-
else:
2753-
result = ints
2754-
elif seen.is_bool and not seen.nan_:
2755-
result = bools.view(np.bool_)
2723+
result = ints
27562724

27572725
if result is uints or result is ints or result is floats or result is complexes:
27582726
# cast to the largest itemsize when all values are NumPy scalars

pandas/core/dtypes/cast.py

+6-8
Original file line numberDiff line numberDiff line change
@@ -1192,17 +1192,15 @@ def maybe_infer_to_datetimelike(
11921192
if not isinstance(value, np.ndarray) or value.dtype != object:
11931193
# Caller is responsible for passing only ndarray[object]
11941194
raise TypeError(type(value)) # pragma: no cover
1195+
if value.ndim != 1:
1196+
# Caller is responsible
1197+
raise ValueError(value.ndim) # pragma: no cover
11951198

1196-
v = np.array(value, copy=False)
1197-
1198-
if v.ndim != 1:
1199-
v = v.ravel()
1200-
1201-
if not len(v):
1199+
if not len(value):
12021200
return value
12031201

12041202
out = lib.maybe_convert_objects(
1205-
v,
1203+
value,
12061204
convert_period=True,
12071205
convert_interval=True,
12081206
convert_timedelta=True,
@@ -1213,7 +1211,7 @@ def maybe_infer_to_datetimelike(
12131211
# Here we do not convert numeric dtypes, as if we wanted that,
12141212
# numpy would have done it for us.
12151213
# See also _maybe_cast_data_without_dtype
1216-
return v
1214+
return value
12171215
# Incompatible return value type (got "Union[ExtensionArray, ndarray[Any, Any]]",
12181216
# expected "Union[ndarray[Any, Any], DatetimeArray, TimedeltaArray, PeriodArray,
12191217
# IntervalArray]")

0 commit comments

Comments
 (0)