Skip to content

Commit ca6c0ce

Browse files
jbrockmendelphofl
authored andcommitted
REF: array_to_timedelta64 handle 2D (pandas-dev#45788)
1 parent 6b4750c commit ca6c0ce

File tree

2 files changed

+88
-28
lines changed

2 files changed

+88
-28
lines changed

pandas/_libs/tslibs/timedeltas.pyx

+67-27
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import cython
66
from cpython.object cimport (
77
Py_EQ,
88
Py_NE,
9+
PyObject,
910
PyObject_RichCompare,
1011
)
1112

@@ -312,8 +313,9 @@ cdef convert_to_timedelta64(object ts, str unit):
312313
@cython.boundscheck(False)
313314
@cython.wraparound(False)
314315
def array_to_timedelta64(
315-
ndarray[object] values, str unit=None, str errors="raise"
316+
ndarray values, str unit=None, str errors="raise"
316317
) -> ndarray:
318+
# values is object-dtype, may be 2D
317319
"""
318320
Convert an ndarray to an array of timedeltas. If errors == 'coerce',
319321
coerce non-convertible objects to NaT. Otherwise, raise.
@@ -324,19 +326,26 @@ def array_to_timedelta64(
324326
"""
325327

326328
cdef:
327-
Py_ssize_t i, n
328-
int64_t[:] iresult
329+
Py_ssize_t i, n = values.size
330+
ndarray result = np.empty((<object>values).shape, dtype="m8[ns]")
331+
object item
332+
int64_t ival
333+
cnp.broadcast mi = cnp.PyArray_MultiIterNew2(result, values)
334+
cnp.flatiter it
335+
336+
if values.descr.type_num != cnp.NPY_OBJECT:
337+
# raise here otherwise we segfault below
338+
raise TypeError("array_to_timedelta64 'values' must have object dtype")
329339

330340
if errors not in {'ignore', 'raise', 'coerce'}:
331341
raise ValueError("errors must be one of {'ignore', 'raise', or 'coerce'}")
332342

333-
n = values.shape[0]
334-
result = np.empty(n, dtype='m8[ns]')
335-
iresult = result.view('i8')
336-
337-
if unit is not None:
343+
if unit is not None and errors != "coerce":
344+
it = cnp.PyArray_IterNew(values)
338345
for i in range(n):
339-
if isinstance(values[i], str) and errors != "coerce":
346+
# Analogous to: item = values[i]
347+
item = cnp.PyArray_GETITEM(values, cnp.PyArray_ITER_DATA(it))
348+
if isinstance(item, str):
340349
raise ValueError(
341350
"unit must not be specified if the input contains a str"
342351
)
@@ -346,28 +355,59 @@ def array_to_timedelta64(
346355
# this is where all of the error handling will take place.
347356
try:
348357
for i in range(n):
349-
if values[i] is NaT:
350-
# we allow this check in the fast-path because NaT is a C-object
351-
# so this is an inexpensive check
352-
iresult[i] = NPY_NAT
353-
else:
354-
result[i] = parse_timedelta_string(values[i])
358+
# Analogous to: item = values[i]
359+
item = <object>(<PyObject**>cnp.PyArray_MultiIter_DATA(mi, 1))[0]
360+
361+
ival = _item_to_timedelta64_fastpath(item)
362+
363+
# Analogous to: iresult[i] = ival
364+
(<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 0))[0] = ival
365+
366+
cnp.PyArray_MultiIter_NEXT(mi)
367+
355368
except (TypeError, ValueError):
369+
cnp.PyArray_MultiIter_RESET(mi)
370+
356371
parsed_unit = parse_timedelta_unit(unit or 'ns')
357372
for i in range(n):
358-
try:
359-
result[i] = convert_to_timedelta64(values[i], parsed_unit)
360-
except ValueError as err:
361-
if errors == 'coerce':
362-
result[i] = NPY_NAT
363-
elif "unit abbreviation w/o a number" in str(err):
364-
# re-raise with more pertinent message
365-
msg = f"Could not convert '{values[i]}' to NumPy timedelta"
366-
raise ValueError(msg) from err
367-
else:
368-
raise
373+
item = <object>(<PyObject**>cnp.PyArray_MultiIter_DATA(mi, 1))[0]
374+
375+
ival = _item_to_timedelta64(item, parsed_unit, errors)
369376

370-
return iresult.base # .base to access underlying np.ndarray
377+
(<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 0))[0] = ival
378+
379+
cnp.PyArray_MultiIter_NEXT(mi)
380+
381+
return result
382+
383+
384+
cdef inline int64_t _item_to_timedelta64_fastpath(object item) except? -1:
385+
"""
386+
See array_to_timedelta64.
387+
"""
388+
if item is NaT:
389+
# we allow this check in the fast-path because NaT is a C-object
390+
# so this is an inexpensive check
391+
return NPY_NAT
392+
else:
393+
return parse_timedelta_string(item)
394+
395+
396+
cdef inline int64_t _item_to_timedelta64(object item, str parsed_unit, str errors) except? -1:
397+
"""
398+
See array_to_timedelta64.
399+
"""
400+
try:
401+
return get_timedelta64_value(convert_to_timedelta64(item, parsed_unit))
402+
except ValueError as err:
403+
if errors == "coerce":
404+
return NPY_NAT
405+
elif "unit abbreviation w/o a number" in str(err):
406+
# re-raise with more pertinent message
407+
msg = f"Could not convert '{item}' to NumPy timedelta"
408+
raise ValueError(msg) from err
409+
else:
410+
raise
371411

372412

373413
cdef inline int64_t parse_timedelta_string(str ts) except? -1:

pandas/tests/tslibs/test_timedeltas.py

+21-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@
33
import numpy as np
44
import pytest
55

6-
from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds
6+
from pandas._libs.tslibs.timedeltas import (
7+
array_to_timedelta64,
8+
delta_to_nanoseconds,
9+
)
710

811
from pandas import (
912
Timedelta,
@@ -63,3 +66,20 @@ def test_kwarg_assertion(kwargs):
6366

6467
with pytest.raises(ValueError, match=re.escape(err_message)):
6568
Timedelta(**kwargs)
69+
70+
71+
class TestArrayToTimedelta64:
72+
def test_array_to_timedelta64_string_with_unit_2d_raises(self):
73+
# check the 'unit is not None and errors != "coerce"' path
74+
# in array_to_timedelta64 raises correctly with 2D values
75+
values = np.array([["1", 2], [3, "4"]], dtype=object)
76+
with pytest.raises(ValueError, match="unit must not be specified"):
77+
array_to_timedelta64(values, unit="s")
78+
79+
def test_array_to_timedelta64_non_object_raises(self):
80+
# check we raise, not segfault
81+
values = np.arange(5)
82+
83+
msg = "'values' must have object dtype"
84+
with pytest.raises(TypeError, match=msg):
85+
array_to_timedelta64(values)

0 commit comments

Comments
 (0)