Skip to content

Commit e0cb0b3

Browse files
authored
BUG: empty strings raise in non-ISO8601 formats but parse as NaT elsewhere (#50252)
1 parent 026a83e commit e0cb0b3

File tree

3 files changed

+19
-22
lines changed

3 files changed

+19
-22
lines changed

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -784,6 +784,7 @@ Datetimelike
784784
- Bug in ``pandas.tseries.holiday.Holiday`` where a half-open date interval causes inconsistent return types from :meth:`USFederalHolidayCalendar.holidays` (:issue:`49075`)
785785
- Bug in rendering :class:`DatetimeIndex` and :class:`Series` and :class:`DataFrame` with timezone-aware dtypes with ``dateutil`` or ``zoneinfo`` timezones near daylight-savings transitions (:issue:`49684`)
786786
- Bug in :func:`to_datetime` was raising ``ValueError`` when parsing :class:`Timestamp`, ``datetime.datetime``, ``datetime.date``, or ``np.datetime64`` objects when non-ISO8601 ``format`` was passed (:issue:`49298`, :issue:`50036`)
787+
- Bug in :func:`to_datetime` was raising ``ValueError`` when parsing empty string and non-ISO8601 format was passed. Now, empty strings will be parsed as :class:`NaT`, for compatibility with how is done for ISO8601 formats (:issue:`50251`)
787788
- Bug in :class:`Timestamp` was showing ``UserWarning``, which was not actionable by users, when parsing non-ISO8601 delimited date strings (:issue:`50232`)
788789
-
789790

pandas/_libs/tslibs/strptime.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ def array_strptime(
157157
for i in range(n):
158158
val = values[i]
159159
if isinstance(val, str):
160-
if val in nat_strings:
160+
if len(val) == 0 or val in nat_strings:
161161
iresult[i] = NPY_NAT
162162
continue
163163
elif checknull_with_nat_and_na(val):

pandas/tests/tools/test_to_datetime.py

+17-21
Original file line numberDiff line numberDiff line change
@@ -2030,17 +2030,13 @@ def test_to_datetime_timezone_name(self):
20302030
assert result == expected
20312031

20322032
@td.skip_if_not_us_locale
2033-
def test_to_datetime_with_apply_with_empty_str(self, cache):
2033+
@pytest.mark.parametrize("errors", ["raise", "coerce", "ignore"])
2034+
def test_to_datetime_with_apply_with_empty_str(self, cache, errors):
20342035
# this is only locale tested with US/None locales
2035-
# GH 5195
2036+
# GH 5195, GH50251
20362037
# with a format and coerce a single item to_datetime fails
20372038
td = Series(["May 04", "Jun 02", ""], index=[1, 2, 3])
2038-
msg = r"time data '' does not match format '%b %y' \(match\)"
2039-
with pytest.raises(ValueError, match=msg):
2040-
to_datetime(td, format="%b %y", errors="raise", cache=cache)
2041-
with pytest.raises(ValueError, match=msg):
2042-
td.apply(to_datetime, format="%b %y", errors="raise", cache=cache)
2043-
expected = to_datetime(td, format="%b %y", errors="coerce", cache=cache)
2039+
expected = to_datetime(td, format="%b %y", errors=errors, cache=cache)
20442040

20452041
result = td.apply(
20462042
lambda x: to_datetime(x, format="%b %y", errors="coerce", cache=cache)
@@ -2987,24 +2983,24 @@ def test_na_to_datetime(nulls_fixture, klass):
29872983
assert result[0] is NaT
29882984

29892985

2990-
def test_empty_string_datetime_coerce_format():
2991-
# GH13044
2992-
td = Series(["03/24/2016", "03/25/2016", ""])
2993-
format = "%m/%d/%Y"
2986+
@pytest.mark.parametrize("errors", ["raise", "coerce", "ignore"])
2987+
@pytest.mark.parametrize(
2988+
"args, format",
2989+
[
2990+
(["03/24/2016", "03/25/2016", ""], "%m/%d/%Y"),
2991+
(["2016-03-24", "2016-03-25", ""], "%Y-%m-%d"),
2992+
],
2993+
ids=["non-ISO8601", "ISO8601"],
2994+
)
2995+
def test_empty_string_datetime(errors, args, format):
2996+
# GH13044, GH50251
2997+
td = Series(args)
29942998

29952999
# coerce empty string to pd.NaT
2996-
result = to_datetime(td, format=format, errors="coerce")
3000+
result = to_datetime(td, format=format, errors=errors)
29973001
expected = Series(["2016-03-24", "2016-03-25", NaT], dtype="datetime64[ns]")
29983002
tm.assert_series_equal(expected, result)
29993003

3000-
# raise an exception in case a format is given
3001-
with pytest.raises(ValueError, match="does not match format"):
3002-
to_datetime(td, format=format, errors="raise")
3003-
3004-
# still raise an exception in case no format is given
3005-
with pytest.raises(ValueError, match="does not match format"):
3006-
to_datetime(td, errors="raise")
3007-
30083004

30093005
def test_empty_string_datetime_coerce__unit():
30103006
# GH13044

0 commit comments

Comments
 (0)