From 716d32b845910df9c2f595f61b5ad9e383003b10 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Wed, 14 Dec 2022 08:04:36 +0000 Subject: [PATCH 1/3] empty strings -> nat --- doc/source/whatsnew/v2.0.0.rst | 1 + pandas/_libs/tslibs/strptime.pyx | 2 +- pandas/tests/tools/test_to_datetime.py | 27 ++++++++------------------ 3 files changed, 10 insertions(+), 20 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 9d5c9c67224a7..fbf4f73143304 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -776,6 +776,7 @@ Datetimelike - Bug in rendering :class:`DatetimeIndex` and :class:`Series` and :class:`DataFrame` with timezone-aware dtypes with ``dateutil`` or ``zoneinfo`` timezones near daylight-savings transitions (:issue:`49684`) - Bug in :func:`to_datetime` was raising ``ValueError`` when parsing :class:`Timestamp`, ``datetime.datetime``, ``datetime.date``, or ``np.datetime64`` objects when non-ISO8601 ``format`` was passed (:issue:`49298`, :issue:`50036`) - Bug in :class:`Timestamp` was showing ``UserWarning`` which was not actionable by users (:issue:`50232`) +- Bug in :func:`to_datetime` was raising ``ValueError`` when parsing empty string and non-ISO8601 format was passed. Now, empty strings will be parsed as :class:`NaT`, for compatibility with how is done for ISO8601 formats (:issue:`50251`) - Timedelta diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 3736b21a85611..211c56e3ab3de 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -153,7 +153,7 @@ def array_strptime( for i in range(n): val = values[i] if isinstance(val, str): - if val in nat_strings: + if len(val) == 0 or val in nat_strings: iresult[i] = NPY_NAT continue elif checknull_with_nat_and_na(val): diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 48844beed30f4..c7d985774cfee 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -2019,17 +2019,13 @@ def test_to_datetime_timezone_name(self): assert result == expected @td.skip_if_not_us_locale - def test_to_datetime_with_apply_with_empty_str(self, cache): + @pytest.mark.parametrize("errors", ["raise", "coerce", "ignore"]) + def test_to_datetime_with_apply_with_empty_str(self, cache, errors): # this is only locale tested with US/None locales - # GH 5195 + # GH 5195, GH50251 # with a format and coerce a single item to_datetime fails td = Series(["May 04", "Jun 02", ""], index=[1, 2, 3]) - msg = r"time data '' does not match format '%b %y' \(match\)" - with pytest.raises(ValueError, match=msg): - to_datetime(td, format="%b %y", errors="raise", cache=cache) - with pytest.raises(ValueError, match=msg): - td.apply(to_datetime, format="%b %y", errors="raise", cache=cache) - expected = to_datetime(td, format="%b %y", errors="coerce", cache=cache) + expected = to_datetime(td, format="%b %y", errors=errors, cache=cache) result = td.apply( lambda x: to_datetime(x, format="%b %y", errors="coerce", cache=cache) @@ -2976,24 +2972,17 @@ def test_na_to_datetime(nulls_fixture, klass): assert result[0] is NaT -def test_empty_string_datetime_coerce_format(): - # GH13044 +@pytest.mark.parametrize("errors", ["raise", "coerce", "ignore"]) +def test_empty_string_datetime_coerce_format(errors): + # GH13044, GH50251 td = Series(["03/24/2016", "03/25/2016", ""]) format = "%m/%d/%Y" # coerce empty string to pd.NaT - result = to_datetime(td, format=format, errors="coerce") + result = to_datetime(td, format=format, errors=errors) expected = Series(["2016-03-24", "2016-03-25", NaT], dtype="datetime64[ns]") tm.assert_series_equal(expected, result) - # raise an exception in case a format is given - with pytest.raises(ValueError, match="does not match format"): - to_datetime(td, format=format, errors="raise") - - # still raise an exception in case no format is given - with pytest.raises(ValueError, match="does not match format"): - to_datetime(td, errors="raise") - def test_empty_string_datetime_coerce__unit(): # GH13044 From 9296a3e4fb90e07c7aa6da3e86f1dc0b65fa27a4 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Thu, 15 Dec 2022 08:11:56 +0000 Subject: [PATCH 2/3] :truck: rename func --- pandas/tests/tools/test_to_datetime.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 32a135f022d0e..2523867fd2f74 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -2984,7 +2984,7 @@ def test_na_to_datetime(nulls_fixture, klass): @pytest.mark.parametrize("errors", ["raise", "coerce", "ignore"]) -def test_empty_string_datetime_coerce_format(errors): +def test_empty_string_datetime(errors): # GH13044, GH50251 td = Series(["03/24/2016", "03/25/2016", ""]) format = "%m/%d/%Y" From bcaf6c006fc96594765507e60b72fd93f1793a29 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Thu, 15 Dec 2022 08:19:53 +0000 Subject: [PATCH 3/3] parametrise over iso and non-iso --- pandas/tests/tools/test_to_datetime.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 2523867fd2f74..7e1a3e9129a12 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -2984,10 +2984,17 @@ def test_na_to_datetime(nulls_fixture, klass): @pytest.mark.parametrize("errors", ["raise", "coerce", "ignore"]) -def test_empty_string_datetime(errors): +@pytest.mark.parametrize( + "args, format", + [ + (["03/24/2016", "03/25/2016", ""], "%m/%d/%Y"), + (["2016-03-24", "2016-03-25", ""], "%Y-%m-%d"), + ], + ids=["non-ISO8601", "ISO8601"], +) +def test_empty_string_datetime(errors, args, format): # GH13044, GH50251 - td = Series(["03/24/2016", "03/25/2016", ""]) - format = "%m/%d/%Y" + td = Series(args) # coerce empty string to pd.NaT result = to_datetime(td, format=format, errors=errors)