Skip to content

Commit 974cee1

Browse files
jbrockmendelyehoshuadimarsky
authored andcommitted
PERF: avoid potentially-copying ravel (pandas-dev#46733)
1 parent 58f3fb8 commit 974cee1

File tree

3 files changed

+26
-12
lines changed

3 files changed

+26
-12
lines changed

pandas/_libs/tslibs/conversion.pyx

+22-8
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import cython
22
import numpy as np
33

44
cimport numpy as cnp
5+
from cpython.object cimport PyObject
56
from numpy cimport (
67
int32_t,
78
int64_t,
@@ -273,7 +274,8 @@ def ensure_timedelta64ns(arr: ndarray, copy: bool = True):
273274

274275
@cython.boundscheck(False)
275276
@cython.wraparound(False)
276-
def datetime_to_datetime64(ndarray[object] values):
277+
def datetime_to_datetime64(ndarray values):
278+
# ndarray[object], but can't declare object without ndim
277279
"""
278280
Convert ndarray of datetime-like objects to int64 array representing
279281
nanosecond timestamps.
@@ -288,20 +290,27 @@ def datetime_to_datetime64(ndarray[object] values):
288290
inferred_tz : tzinfo or None
289291
"""
290292
cdef:
291-
Py_ssize_t i, n = len(values)
293+
Py_ssize_t i, n = values.size
292294
object val
293-
int64_t[:] iresult
295+
int64_t ival
296+
ndarray iresult # int64_t, but can't declare that without specifying ndim
294297
npy_datetimestruct dts
295298
_TSObject _ts
296299
bint found_naive = False
297300
tzinfo inferred_tz = None
298301

299-
result = np.empty(n, dtype='M8[ns]')
302+
cnp.broadcast mi
303+
304+
result = np.empty((<object>values).shape, dtype='M8[ns]')
300305
iresult = result.view('i8')
306+
307+
mi = cnp.PyArray_MultiIterNew2(iresult, values)
301308
for i in range(n):
302-
val = values[i]
309+
# Analogous to: val = values[i]
310+
val = <object>(<PyObject**>cnp.PyArray_MultiIter_DATA(mi, 1))[0]
311+
303312
if checknull_with_nat(val):
304-
iresult[i] = NPY_NAT
313+
ival = NPY_NAT
305314
elif PyDateTime_Check(val):
306315
if val.tzinfo is not None:
307316
if found_naive:
@@ -314,18 +323,23 @@ def datetime_to_datetime64(ndarray[object] values):
314323
inferred_tz = val.tzinfo
315324

316325
_ts = convert_datetime_to_tsobject(val, None)
317-
iresult[i] = _ts.value
326+
ival = _ts.value
318327
check_dts_bounds(&_ts.dts)
319328
else:
320329
found_naive = True
321330
if inferred_tz is not None:
322331
raise ValueError('Cannot mix tz-aware with '
323332
'tz-naive values')
324-
iresult[i] = pydatetime_to_dt64(val, &dts)
333+
ival = pydatetime_to_dt64(val, &dts)
325334
check_dts_bounds(&dts)
326335
else:
327336
raise TypeError(f'Unrecognized value type: {type(val)}')
328337

338+
# Analogous to: iresult[i] = ival
339+
(<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 0))[0] = ival
340+
341+
cnp.PyArray_MultiIter_NEXT(mi)
342+
329343
return result, inferred_tz
330344

331345

pandas/core/arrays/datetimes.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -2247,10 +2247,9 @@ def objects_to_datetime64ns(
22472247
result = result.reshape(data.shape, order=order)
22482248
except ValueError as err:
22492249
try:
2250-
values, tz_parsed = conversion.datetime_to_datetime64(data.ravel("K"))
2250+
values, tz_parsed = conversion.datetime_to_datetime64(data)
22512251
# If tzaware, these values represent unix timestamps, so we
22522252
# return them as i8 to distinguish from wall times
2253-
values = values.reshape(data.shape, order=order)
22542253
return values.view("i8"), tz_parsed
22552254
except (ValueError, TypeError):
22562255
raise err

pandas/core/arrays/timedeltas.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -429,14 +429,15 @@ def _formatter(self, boxed: bool = False):
429429

430430
return get_format_timedelta64(self, box=True)
431431

432-
@dtl.ravel_compat
433432
def _format_native_types(
434433
self, *, na_rep="NaT", date_format=None, **kwargs
435434
) -> npt.NDArray[np.object_]:
436435
from pandas.io.formats.format import get_format_timedelta64
437436

438437
formatter = get_format_timedelta64(self._ndarray, na_rep)
439-
return np.array([formatter(x) for x in self._ndarray])
438+
# equiv: np.array([formatter(x) for x in self._ndarray])
439+
# but independent of dimension
440+
return np.frompyfunc(formatter, 1, 1)(self._ndarray)
440441

441442
# ----------------------------------------------------------------
442443
# Arithmetic Methods

0 commit comments

Comments
 (0)