Skip to content

REF: avoid ravel in ints_to_pytimedelta #47261

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jun 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 34 additions & 15 deletions pandas/_libs/tslibs/timedeltas.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -161,42 +161,61 @@ def ints_to_pytimedelta(ndarray m8values, box=False):
array of Timedelta or timedeltas objects
"""
cdef:
NPY_DATETIMEUNIT reso = get_unit_from_dtype(m8values.dtype)
Py_ssize_t i, n = m8values.size
int64_t value
object[::1] result = np.empty(n, dtype=object)
NPY_DATETIMEUNIT reso = get_unit_from_dtype(m8values.dtype)
object res_val

# Note that `result` (and thus `result_flat`) is C-order and
# `it` iterates C-order as well, so the iteration matches
# See discussion at
# github.com/pandas-dev/pandas/pull/46886#discussion_r860261305
ndarray result = cnp.PyArray_EMPTY(m8values.ndim, m8values.shape, cnp.NPY_OBJECT, 0)
object[::1] res_flat = result.ravel() # should NOT be a copy

arr = m8values.view("i8")
ndarray arr = m8values.view("i8")
cnp.flatiter it = cnp.PyArray_IterNew(arr)

for i in range(n):
# Analogous to: value = arr[i]
value = (<int64_t*>cnp.PyArray_ITER_DATA(it))[0]

value = arr[i]
if value == NPY_NAT:
result[i] = <object>NaT
res_val = <object>NaT
else:
if box:
result[i] = _timedelta_from_value_and_reso(value, reso=reso)
res_val = _timedelta_from_value_and_reso(value, reso=reso)
elif reso == NPY_DATETIMEUNIT.NPY_FR_ns:
result[i] = timedelta(microseconds=int(value) / 1000)
res_val = timedelta(microseconds=int(value) / 1000)
elif reso == NPY_DATETIMEUNIT.NPY_FR_us:
result[i] = timedelta(microseconds=value)
res_val = timedelta(microseconds=value)
elif reso == NPY_DATETIMEUNIT.NPY_FR_ms:
result[i] = timedelta(milliseconds=value)
res_val = timedelta(milliseconds=value)
elif reso == NPY_DATETIMEUNIT.NPY_FR_s:
result[i] = timedelta(seconds=value)
res_val = timedelta(seconds=value)
elif reso == NPY_DATETIMEUNIT.NPY_FR_m:
result[i] = timedelta(minutes=value)
res_val = timedelta(minutes=value)
elif reso == NPY_DATETIMEUNIT.NPY_FR_h:
result[i] = timedelta(hours=value)
res_val = timedelta(hours=value)
elif reso == NPY_DATETIMEUNIT.NPY_FR_D:
result[i] = timedelta(days=value)
res_val = timedelta(days=value)
elif reso == NPY_DATETIMEUNIT.NPY_FR_W:
result[i] = timedelta(weeks=value)
res_val = timedelta(weeks=value)
else:
# Month, Year, NPY_FR_GENERIC, pico, fempto, atto
raise NotImplementedError(reso)

return result.base # .base to access underlying np.ndarray
# Note: we can index result directly instead of using PyArray_MultiIter_DATA
# like we do for the other functions because result is known C-contiguous
# and is the first argument to PyArray_MultiIterNew2. The usual pattern
# does not seem to work with object dtype.
# See discussion at
# github.com/pandas-dev/pandas/pull/46886#discussion_r860261305
res_flat[i] = res_val

cnp.PyArray_ITER_NEXT(it)

return result


# ----------------------------------------------------------------------
Expand Down
4 changes: 1 addition & 3 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,9 +437,7 @@ def astype(self, dtype, copy: bool = True):
return converted.reshape(self.shape)

elif self.dtype.kind == "m":
i8data = self.asi8.ravel()
converted = ints_to_pytimedelta(self._ndarray.ravel(), box=True)
return converted.reshape(self.shape)
return ints_to_pytimedelta(self._ndarray, box=True)

return self._box_values(self.asi8.ravel()).reshape(self.shape)

Expand Down