Skip to content

BUG: empty strings raise in non-ISO8601 formats but parse as NaT elsewhere #50252

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Dec 16, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -782,6 +782,7 @@ Datetimelike
- Bug in ``pandas.tseries.holiday.Holiday`` where a half-open date interval causes inconsistent return types from :meth:`USFederalHolidayCalendar.holidays` (:issue:`49075`)
- Bug in rendering :class:`DatetimeIndex` and :class:`Series` and :class:`DataFrame` with timezone-aware dtypes with ``dateutil`` or ``zoneinfo`` timezones near daylight-savings transitions (:issue:`49684`)
- Bug in :func:`to_datetime` was raising ``ValueError`` when parsing :class:`Timestamp`, ``datetime.datetime``, ``datetime.date``, or ``np.datetime64`` objects when non-ISO8601 ``format`` was passed (:issue:`49298`, :issue:`50036`)
- Bug in :func:`to_datetime` was raising ``ValueError`` when parsing empty string and non-ISO8601 format was passed. Now, empty strings will be parsed as :class:`NaT`, for compatibility with how is done for ISO8601 formats (:issue:`50251`)
- Bug in :class:`Timestamp` was showing ``UserWarning``, which was not actionable by users, when parsing non-ISO8601 delimited date strings (:issue:`50232`)
-

Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/strptime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ def array_strptime(
for i in range(n):
val = values[i]
if isinstance(val, str):
if val in nat_strings:
if len(val) == 0 or val in nat_strings:
iresult[i] = NPY_NAT
continue
elif checknull_with_nat_and_na(val):
Expand Down
38 changes: 17 additions & 21 deletions pandas/tests/tools/test_to_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -2030,17 +2030,13 @@ def test_to_datetime_timezone_name(self):
assert result == expected

@td.skip_if_not_us_locale
def test_to_datetime_with_apply_with_empty_str(self, cache):
@pytest.mark.parametrize("errors", ["raise", "coerce", "ignore"])
def test_to_datetime_with_apply_with_empty_str(self, cache, errors):
# this is only locale tested with US/None locales
# GH 5195
# GH 5195, GH50251
# with a format and coerce a single item to_datetime fails
td = Series(["May 04", "Jun 02", ""], index=[1, 2, 3])
msg = r"time data '' does not match format '%b %y' \(match\)"
with pytest.raises(ValueError, match=msg):
to_datetime(td, format="%b %y", errors="raise", cache=cache)
with pytest.raises(ValueError, match=msg):
td.apply(to_datetime, format="%b %y", errors="raise", cache=cache)
expected = to_datetime(td, format="%b %y", errors="coerce", cache=cache)
expected = to_datetime(td, format="%b %y", errors=errors, cache=cache)

result = td.apply(
lambda x: to_datetime(x, format="%b %y", errors="coerce", cache=cache)
Expand Down Expand Up @@ -2987,24 +2983,24 @@ def test_na_to_datetime(nulls_fixture, klass):
assert result[0] is NaT


def test_empty_string_datetime_coerce_format():
# GH13044
td = Series(["03/24/2016", "03/25/2016", ""])
format = "%m/%d/%Y"
@pytest.mark.parametrize("errors", ["raise", "coerce", "ignore"])
@pytest.mark.parametrize(
"args, format",
[
(["03/24/2016", "03/25/2016", ""], "%m/%d/%Y"),
(["2016-03-24", "2016-03-25", ""], "%Y-%m-%d"),
],
ids=["non-ISO8601", "ISO8601"],
)
def test_empty_string_datetime(errors, args, format):
# GH13044, GH50251
td = Series(args)

# coerce empty string to pd.NaT
result = to_datetime(td, format=format, errors="coerce")
result = to_datetime(td, format=format, errors=errors)
expected = Series(["2016-03-24", "2016-03-25", NaT], dtype="datetime64[ns]")
tm.assert_series_equal(expected, result)

# raise an exception in case a format is given
with pytest.raises(ValueError, match="does not match format"):
to_datetime(td, format=format, errors="raise")

# still raise an exception in case no format is given
with pytest.raises(ValueError, match="does not match format"):
to_datetime(td, errors="raise")


def test_empty_string_datetime_coerce__unit():
# GH13044
Expand Down