Skip to content

PERF: avoid potentially-copying ravel #46733

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 22 additions & 8 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import cython
import numpy as np

cimport numpy as cnp
from cpython.object cimport PyObject
from numpy cimport (
int32_t,
int64_t,
Expand Down Expand Up @@ -273,7 +274,8 @@ def ensure_timedelta64ns(arr: ndarray, copy: bool = True):

@cython.boundscheck(False)
@cython.wraparound(False)
def datetime_to_datetime64(ndarray[object] values):
def datetime_to_datetime64(ndarray values):
# ndarray[object], but can't declare object without ndim
"""
Convert ndarray of datetime-like objects to int64 array representing
nanosecond timestamps.
Expand All @@ -288,20 +290,27 @@ def datetime_to_datetime64(ndarray[object] values):
inferred_tz : tzinfo or None
"""
cdef:
Py_ssize_t i, n = len(values)
Py_ssize_t i, n = values.size
object val
int64_t[:] iresult
int64_t ival
ndarray iresult # int64_t, but can't declare that without specifying ndim
npy_datetimestruct dts
_TSObject _ts
bint found_naive = False
tzinfo inferred_tz = None

result = np.empty(n, dtype='M8[ns]')
cnp.broadcast mi

result = np.empty((<object>values).shape, dtype='M8[ns]')
iresult = result.view('i8')

mi = cnp.PyArray_MultiIterNew2(iresult, values)
for i in range(n):
val = values[i]
# Analogous to: val = values[i]
val = <object>(<PyObject**>cnp.PyArray_MultiIter_DATA(mi, 1))[0]

if checknull_with_nat(val):
iresult[i] = NPY_NAT
ival = NPY_NAT
elif PyDateTime_Check(val):
if val.tzinfo is not None:
if found_naive:
Expand All @@ -314,18 +323,23 @@ def datetime_to_datetime64(ndarray[object] values):
inferred_tz = val.tzinfo

_ts = convert_datetime_to_tsobject(val, None)
iresult[i] = _ts.value
ival = _ts.value
check_dts_bounds(&_ts.dts)
else:
found_naive = True
if inferred_tz is not None:
raise ValueError('Cannot mix tz-aware with '
'tz-naive values')
iresult[i] = pydatetime_to_dt64(val, &dts)
ival = pydatetime_to_dt64(val, &dts)
check_dts_bounds(&dts)
else:
raise TypeError(f'Unrecognized value type: {type(val)}')

# Analogous to: iresult[i] = ival
(<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 0))[0] = ival

cnp.PyArray_MultiIter_NEXT(mi)

return result, inferred_tz


Expand Down
3 changes: 1 addition & 2 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2247,10 +2247,9 @@ def objects_to_datetime64ns(
result = result.reshape(data.shape, order=order)
except ValueError as err:
try:
values, tz_parsed = conversion.datetime_to_datetime64(data.ravel("K"))
values, tz_parsed = conversion.datetime_to_datetime64(data)
# If tzaware, these values represent unix timestamps, so we
# return them as i8 to distinguish from wall times
values = values.reshape(data.shape, order=order)
return values.view("i8"), tz_parsed
except (ValueError, TypeError):
raise err
Expand Down
5 changes: 3 additions & 2 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -429,14 +429,15 @@ def _formatter(self, boxed: bool = False):

return get_format_timedelta64(self, box=True)

@dtl.ravel_compat
def _format_native_types(
self, *, na_rep="NaT", date_format=None, **kwargs
) -> npt.NDArray[np.object_]:
from pandas.io.formats.format import get_format_timedelta64

formatter = get_format_timedelta64(self._ndarray, na_rep)
return np.array([formatter(x) for x in self._ndarray])
# equiv: np.array([formatter(x) for x in self._ndarray])
# but independent of dimension
return np.frompyfunc(formatter, 1, 1)(self._ndarray)

# ----------------------------------------------------------------
# Arithmetic Methods
Expand Down