diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index d327ca6256015..8eaf86b3d193f 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -6,6 +6,7 @@ import cython from cpython.object cimport ( Py_EQ, Py_NE, + PyObject, PyObject_RichCompare, ) @@ -312,8 +313,9 @@ cdef convert_to_timedelta64(object ts, str unit): @cython.boundscheck(False) @cython.wraparound(False) def array_to_timedelta64( - ndarray[object] values, str unit=None, str errors="raise" + ndarray values, str unit=None, str errors="raise" ) -> ndarray: + # values is object-dtype, may be 2D """ Convert an ndarray to an array of timedeltas. If errors == 'coerce', coerce non-convertible objects to NaT. Otherwise, raise. @@ -324,19 +326,26 @@ def array_to_timedelta64( """ cdef: - Py_ssize_t i, n - int64_t[:] iresult + Py_ssize_t i, n = values.size + ndarray result = np.empty((values).shape, dtype="m8[ns]") + object item + int64_t ival + cnp.broadcast mi = cnp.PyArray_MultiIterNew2(result, values) + cnp.flatiter it + + if values.descr.type_num != cnp.NPY_OBJECT: + # raise here otherwise we segfault below + raise TypeError("array_to_timedelta64 'values' must have object dtype") if errors not in {'ignore', 'raise', 'coerce'}: raise ValueError("errors must be one of {'ignore', 'raise', or 'coerce'}") - n = values.shape[0] - result = np.empty(n, dtype='m8[ns]') - iresult = result.view('i8') - - if unit is not None: + if unit is not None and errors != "coerce": + it = cnp.PyArray_IterNew(values) for i in range(n): - if isinstance(values[i], str) and errors != "coerce": + # Analogous to: item = values[i] + item = cnp.PyArray_GETITEM(values, cnp.PyArray_ITER_DATA(it)) + if isinstance(item, str): raise ValueError( "unit must not be specified if the input contains a str" ) @@ -346,28 +355,59 @@ def array_to_timedelta64( # this is where all of the error handling will take place. try: for i in range(n): - if values[i] is NaT: - # we allow this check in the fast-path because NaT is a C-object - # so this is an inexpensive check - iresult[i] = NPY_NAT - else: - result[i] = parse_timedelta_string(values[i]) + # Analogous to: item = values[i] + item = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] + + ival = _item_to_timedelta64_fastpath(item) + + # Analogous to: iresult[i] = ival + (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = ival + + cnp.PyArray_MultiIter_NEXT(mi) + except (TypeError, ValueError): + cnp.PyArray_MultiIter_RESET(mi) + parsed_unit = parse_timedelta_unit(unit or 'ns') for i in range(n): - try: - result[i] = convert_to_timedelta64(values[i], parsed_unit) - except ValueError as err: - if errors == 'coerce': - result[i] = NPY_NAT - elif "unit abbreviation w/o a number" in str(err): - # re-raise with more pertinent message - msg = f"Could not convert '{values[i]}' to NumPy timedelta" - raise ValueError(msg) from err - else: - raise + item = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] + + ival = _item_to_timedelta64(item, parsed_unit, errors) - return iresult.base # .base to access underlying np.ndarray + (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = ival + + cnp.PyArray_MultiIter_NEXT(mi) + + return result + + +cdef inline int64_t _item_to_timedelta64_fastpath(object item) except? -1: + """ + See array_to_timedelta64. + """ + if item is NaT: + # we allow this check in the fast-path because NaT is a C-object + # so this is an inexpensive check + return NPY_NAT + else: + return parse_timedelta_string(item) + + +cdef inline int64_t _item_to_timedelta64(object item, str parsed_unit, str errors) except? -1: + """ + See array_to_timedelta64. + """ + try: + return get_timedelta64_value(convert_to_timedelta64(item, parsed_unit)) + except ValueError as err: + if errors == "coerce": + return NPY_NAT + elif "unit abbreviation w/o a number" in str(err): + # re-raise with more pertinent message + msg = f"Could not convert '{item}' to NumPy timedelta" + raise ValueError(msg) from err + else: + raise cdef inline int64_t parse_timedelta_string(str ts) except? -1: diff --git a/pandas/tests/tslibs/test_timedeltas.py b/pandas/tests/tslibs/test_timedeltas.py index a25f148131ea0..a0ec563d1f48f 100644 --- a/pandas/tests/tslibs/test_timedeltas.py +++ b/pandas/tests/tslibs/test_timedeltas.py @@ -3,7 +3,10 @@ import numpy as np import pytest -from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds +from pandas._libs.tslibs.timedeltas import ( + array_to_timedelta64, + delta_to_nanoseconds, +) from pandas import ( Timedelta, @@ -63,3 +66,20 @@ def test_kwarg_assertion(kwargs): with pytest.raises(ValueError, match=re.escape(err_message)): Timedelta(**kwargs) + + +class TestArrayToTimedelta64: + def test_array_to_timedelta64_string_with_unit_2d_raises(self): + # check the 'unit is not None and errors != "coerce"' path + # in array_to_timedelta64 raises correctly with 2D values + values = np.array([["1", 2], [3, "4"]], dtype=object) + with pytest.raises(ValueError, match="unit must not be specified"): + array_to_timedelta64(values, unit="s") + + def test_array_to_timedelta64_non_object_raises(self): + # check we raise, not segfault + values = np.arange(5) + + msg = "'values' must have object dtype" + with pytest.raises(TypeError, match=msg): + array_to_timedelta64(values)