From 20e196027a7af60caa08e0d92f38cb63cc4c597e Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Thu, 6 Oct 2022 12:26:33 +0100 Subject: [PATCH 1/2] BUG: to_datetime(..., infer_datetime_format=True) fails with np.str_ input --- pandas/_libs/tslibs/parsing.pyx | 4 ---- pandas/core/tools/datetimes.py | 8 +++++--- pandas/tests/tools/test_to_datetime.py | 8 +++++--- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 763530520cd29..51bb21404e7b5 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -963,10 +963,6 @@ def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None: datetime format string (for `strftime` or `strptime`), or None if it can't be guessed. """ - - if not isinstance(dt_str, str): - return None - day_attribute_and_format = (('day',), '%d', 2) # attr name, format, padding (if any) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index f9497860c00ba..204a2a0164c76 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -125,11 +125,13 @@ class FulldatetimeDict(YearMonthDayDict, total=False): # --------------------------------------------------------------------- -def _guess_datetime_format_for_array(arr, dayfirst: bool | None = False): - # Try to guess the format based on the first non-NaN element +def _guess_datetime_format_for_array(arr, dayfirst: bool | None = False) -> str | None: + # Try to guess the format based on the first non-NaN element, return None if can't non_nan_elements = notna(arr).nonzero()[0] if len(non_nan_elements): - return guess_datetime_format(arr[non_nan_elements[0]], dayfirst=dayfirst) + if type(first_non_nan_element := arr[non_nan_elements[0]]) is str: + # GH#32264 np.str_ object + return guess_datetime_format(first_non_nan_element, dayfirst=dayfirst) def should_cache( diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 5c6b4c2434b88..3d59e115d4cf9 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -468,8 +468,10 @@ def test_to_datetime_mixed_datetime_and_string(self): expected = to_datetime([d1, d2]).tz_convert(pytz.FixedOffset(-60)) tm.assert_index_equal(res, expected) - def test_to_datetime_np_str(self): + @pytest.mark.parametrize("infer_datetime_format", [True, False]) + def test_to_datetime_np_str(self, infer_datetime_format): # GH#32264 + # GH#48969 value = np.str_("2019-02-04 10:18:46.297000+0000") ser = Series([value]) @@ -479,11 +481,11 @@ def test_to_datetime_np_str(self): assert to_datetime(value) == exp assert to_datetime(ser.iloc[0]) == exp - res = to_datetime([value]) + res = to_datetime([value], infer_datetime_format=infer_datetime_format) expected = Index([exp]) tm.assert_index_equal(res, expected) - res = to_datetime(ser) + res = to_datetime(ser, infer_datetime_format=infer_datetime_format) expected = Series(expected) tm.assert_series_equal(res, expected) From 8dcb4fd74bc222b49ad2d8a2fc34cad69ac4c025 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Thu, 6 Oct 2022 13:09:04 +0100 Subject: [PATCH 2/2] add return --- pandas/core/tools/datetimes.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 204a2a0164c76..fe14f8e9907d6 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -132,6 +132,7 @@ def _guess_datetime_format_for_array(arr, dayfirst: bool | None = False) -> str if type(first_non_nan_element := arr[non_nan_elements[0]]) is str: # GH#32264 np.str_ object return guess_datetime_format(first_non_nan_element, dayfirst=dayfirst) + return None def should_cache(