diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 763530520cd29..51bb21404e7b5 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -963,10 +963,6 @@ def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None: datetime format string (for `strftime` or `strptime`), or None if it can't be guessed. """ - - if not isinstance(dt_str, str): - return None - day_attribute_and_format = (('day',), '%d', 2) # attr name, format, padding (if any) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index f9497860c00ba..fe14f8e9907d6 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -125,11 +125,14 @@ class FulldatetimeDict(YearMonthDayDict, total=False): # --------------------------------------------------------------------- -def _guess_datetime_format_for_array(arr, dayfirst: bool | None = False): - # Try to guess the format based on the first non-NaN element +def _guess_datetime_format_for_array(arr, dayfirst: bool | None = False) -> str | None: + # Try to guess the format based on the first non-NaN element, return None if can't non_nan_elements = notna(arr).nonzero()[0] if len(non_nan_elements): - return guess_datetime_format(arr[non_nan_elements[0]], dayfirst=dayfirst) + if type(first_non_nan_element := arr[non_nan_elements[0]]) is str: + # GH#32264 np.str_ object + return guess_datetime_format(first_non_nan_element, dayfirst=dayfirst) + return None def should_cache( diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 5c6b4c2434b88..3d59e115d4cf9 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -468,8 +468,10 @@ def test_to_datetime_mixed_datetime_and_string(self): expected = to_datetime([d1, d2]).tz_convert(pytz.FixedOffset(-60)) tm.assert_index_equal(res, expected) - def test_to_datetime_np_str(self): + @pytest.mark.parametrize("infer_datetime_format", [True, False]) + def test_to_datetime_np_str(self, infer_datetime_format): # GH#32264 + # GH#48969 value = np.str_("2019-02-04 10:18:46.297000+0000") ser = Series([value]) @@ -479,11 +481,11 @@ def test_to_datetime_np_str(self): assert to_datetime(value) == exp assert to_datetime(ser.iloc[0]) == exp - res = to_datetime([value]) + res = to_datetime([value], infer_datetime_format=infer_datetime_format) expected = Index([exp]) tm.assert_index_equal(res, expected) - res = to_datetime(ser) + res = to_datetime(ser, infer_datetime_format=infer_datetime_format) expected = Series(expected) tm.assert_series_equal(res, expected)