Skip to content

Commit 50b4df3

Browse files
authored
PERF: short-circuit allocations in infer_dtype, ensure_datetime64ns (#44971)
1 parent 4735c0f commit 50b4df3

File tree

2 files changed

+28
-24
lines changed

2 files changed

+28
-24
lines changed

pandas/_libs/lib.pyx

+11-8
Original file line numberDiff line numberDiff line change
@@ -1447,25 +1447,28 @@ def infer_dtype(value: object, skipna: bool = True) -> str:
14471447
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
14481448
values = construct_1d_object_array_from_listlike(value)
14491449

1450-
# make contiguous
1451-
# for f-contiguous array 1000 x 1000, passing order="K" gives 5000x speedup
1452-
values = values.ravel(order="K")
1453-
14541450
val = _try_infer_map(values.dtype)
14551451
if val is not None:
1452+
# Anything other than object-dtype should return here.
14561453
return val
14571454

1458-
if values.dtype != np.object_:
1459-
values = values.astype("O")
1455+
if values.descr.type_num != NPY_OBJECT:
1456+
# i.e. values.dtype != np.object
1457+
# This should not be reached
1458+
values = values.astype(object)
1459+
1460+
# for f-contiguous array 1000 x 1000, passing order="K" gives 5000x speedup
1461+
values = values.ravel(order="K")
14601462

14611463
if skipna:
14621464
values = values[~isnaobj(values)]
14631465

1464-
n = len(values)
1466+
n = cnp.PyArray_SIZE(values)
14651467
if n == 0:
14661468
return "empty"
14671469

1468-
# try to use a valid value
1470+
# Iterate until we find our first valid value. We will use this
1471+
# value to decide which of the is_foo_array functions to call.
14691472
for i in range(n):
14701473
val = values[i]
14711474

pandas/_libs/tslibs/conversion.pyx

+17-16
Original file line numberDiff line numberDiff line change
@@ -227,12 +227,7 @@ def ensure_datetime64ns(arr: ndarray, copy: bool = True):
227227
dtype = arr.dtype
228228
arr = arr.astype(dtype.newbyteorder("<"))
229229

230-
ivalues = arr.view(np.int64).ravel("K")
231-
232-
result = np.empty_like(arr, dtype=DT64NS_DTYPE)
233-
iresult = result.ravel("K").view(np.int64)
234-
235-
if len(iresult) == 0:
230+
if arr.size == 0:
236231
result = arr.view(DT64NS_DTYPE)
237232
if copy:
238233
result = result.copy()
@@ -245,17 +240,23 @@ def ensure_datetime64ns(arr: ndarray, copy: bool = True):
245240
raise ValueError("datetime64/timedelta64 must have a unit specified")
246241

247242
if unit == NPY_FR_ns:
243+
# Check this before allocating result for perf, might save some memory
248244
if copy:
249-
arr = arr.copy()
250-
result = arr
251-
else:
252-
for i in range(n):
253-
if ivalues[i] != NPY_NAT:
254-
pandas_datetime_to_datetimestruct(ivalues[i], unit, &dts)
255-
iresult[i] = dtstruct_to_dt64(&dts)
256-
check_dts_bounds(&dts)
257-
else:
258-
iresult[i] = NPY_NAT
245+
return arr.copy()
246+
return arr
247+
248+
ivalues = arr.view(np.int64).ravel("K")
249+
250+
result = np.empty_like(arr, dtype=DT64NS_DTYPE)
251+
iresult = result.ravel("K").view(np.int64)
252+
253+
for i in range(n):
254+
if ivalues[i] != NPY_NAT:
255+
pandas_datetime_to_datetimestruct(ivalues[i], unit, &dts)
256+
iresult[i] = dtstruct_to_dt64(&dts)
257+
check_dts_bounds(&dts)
258+
else:
259+
iresult[i] = NPY_NAT
259260

260261
return result
261262

0 commit comments

Comments
 (0)