diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 2cbc7b06b89df..f8283bd61aee7 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -118,7 +118,7 @@ Categorical Datetimelike ^^^^^^^^^^^^ -- +- Bug in :func:`to_datetime` with sequences of ``np.str_`` objects incorrectly raising (:issue:`32264`) - Timedelta diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 2883c910b3833..5c852e85efdc0 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -506,6 +506,9 @@ cpdef array_to_datetime( elif isinstance(val, str): # string seen_string = True + if type(val) is not str: + # GH#32264 np.str_ object + val = str(val) if len(val) == 0 or val in nat_strings: iresult[i] = NPY_NAT @@ -735,6 +738,10 @@ cdef _array_to_datetime_object( # GH 25978. No need to parse NaT-like or datetime-like vals oresult[i] = val elif isinstance(val, str): + if type(val) is not str: + # GH#32264 np.str_ objects + val = str(val) + if len(val) == 0 or val in nat_strings: oresult[i] = 'NaT' continue diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index f2b480642e083..559c56992f71e 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -239,6 +239,9 @@ cdef inline bint does_string_look_like_time(str parse_string): def parse_datetime_string( + # NB: This will break with np.str_ (GH#32264) even though + # isinstance(npstrobj, str) evaluates to True, so caller must ensure + # the argument is *exactly* 'str' str date_string, bint dayfirst=False, bint yearfirst=False, @@ -254,7 +257,7 @@ def parse_datetime_string( """ cdef: - object dt + datetime dt if not _does_string_look_like_datetime(date_string): raise ValueError('Given date string not likely a datetime.') diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 5d5e01084345d..07b1277b6f31b 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -455,6 +455,25 @@ def test_to_datetime_parse_timezone_keeps_name(self): class TestToDatetime: + def test_to_datetime_np_str(self): + # GH#32264 + value = np.str_("2019-02-04 10:18:46.297000+0000") + + ser = Series([value]) + + exp = Timestamp("2019-02-04 10:18:46.297000", tz="UTC") + + assert to_datetime(value) == exp + assert to_datetime(ser.iloc[0]) == exp + + res = to_datetime([value]) + expected = Index([exp]) + tm.assert_index_equal(res, expected) + + res = to_datetime(ser) + expected = Series(expected) + tm.assert_series_equal(res, expected) + @pytest.mark.parametrize( "s, _format, dt", [