Skip to content

Fix/empty string datetimelike conversion/issue 36550 #36834

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

16 changes: 14 additions & 2 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -485,7 +485,16 @@ cpdef array_to_datetime(
# string
seen_string = True

if len(val) == 0 or val in nat_strings:
if len(val) == 0:
if is_coerce:
iresult[i] = NPY_NAT
continue
elif is_ignore:
raise TypeError("Empty string is not a valid datetime")
else:
raise ValueError("Empty string is not a valid datetime")

if val in nat_strings:
iresult[i] = NPY_NAT
continue

Expand Down Expand Up @@ -710,7 +719,10 @@ cdef array_to_datetime_object(
# GH 25978. No need to parse NaT-like or datetime-like vals
oresult[i] = val
elif isinstance(val, str):
if len(val) == 0 or val in nat_strings:
if len(val) == 0:
oresult[i] = val
continue
if val in nat_strings:
oresult[i] = 'NaT'
continue
try:
Expand Down
4 changes: 3 additions & 1 deletion pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -596,7 +596,9 @@ cdef _TSObject _convert_str_to_tsobject(object ts, tzinfo tz, str unit,
int out_local = 0, out_tzoffset = 0
bint do_parse_datetime_string = False

if len(ts) == 0 or ts in nat_strings:
if len(ts) == 0:
raise ValueError("Empty string is not a valid timestamp")
elif ts in nat_strings:
ts = NaT
elif ts == 'now':
# Issue 9000, we short-circuit rather than going
Expand Down
4 changes: 3 additions & 1 deletion pandas/_libs/tslibs/timedeltas.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,9 @@ cdef inline int64_t parse_timedelta_string(str ts) except? -1:
# have_value : track if we have at least 1 leading unit
# have_hhmmss : tracks if we have a regular format hh:mm:ss

if len(ts) == 0 or ts in nat_strings:
if len(ts) == 0:
raise ValueError("Empty string is not a valid timedelta")
if ts in nat_strings:
return NPY_NAT

for c in ts:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2182,7 +2182,7 @@ def isna(self):
For datetimes, `NaT` (Not a Time) is considered as an NA value.

>>> idx = pd.DatetimeIndex([pd.Timestamp('1940-04-25'),
... pd.Timestamp(''), None, pd.NaT])
... pd.Timestamp('NaT'), None, pd.NaT])
>>> idx
DatetimeIndex(['1940-04-25', 'NaT', 'NaT', 'NaT'],
dtype='datetime64[ns]', freq=None)
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/groupby/test_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -643,7 +643,7 @@ def predictions(tool):
{
"Key": ["B", "B", "A", "A"],
"State": ["step1", "step2", "step1", "step2"],
"oTime": ["", "2016-09-19 05:24:33", "", "2016-09-19 23:59:04"],
"oTime": ["NaT", "2016-09-19 05:24:33", "NaT", "2016-09-19 23:59:04"],
"Machine": ["23", "36L", "36R", "36R"],
}
)
Expand Down
4 changes: 1 addition & 3 deletions pandas/tests/io/parser/test_parse_dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -1129,9 +1129,7 @@ def test_parse_dates_empty_string(all_parsers):
data = "Date,test\n2012-01-01,1\n,2"
result = parser.read_csv(StringIO(data), parse_dates=["Date"], na_filter=False)

expected = DataFrame(
[[datetime(2012, 1, 1), 1], [pd.NaT, 2]], columns=["Date", "test"]
)
expected = DataFrame([[datetime(2012, 1, 1), 1], ["", 2]], columns=["Date", "test"])
tm.assert_frame_equal(result, expected)


Expand Down
9 changes: 7 additions & 2 deletions pandas/tests/scalar/test_nat.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,13 @@ def test_identity(klass, value):
@pytest.mark.parametrize("klass", [Timestamp, Timedelta, Period])
@pytest.mark.parametrize("value", ["", "nat", "NAT", None, np.nan])
def test_equality(klass, value):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would instead remove the empty and put this in a new test for that case (and assert the error)

these are duplicating the constructor tests so should have either / or (i think they are fine here)

if klass is Period and value == "":
pytest.skip("Period cannot parse empty string")
if value == "":
if klass is Period and value == "":
pytest.skip("Period cannot parse empty string")
elif klass is Timedelta:
pytest.skip("Timedelta cannot parse empty string")
elif klass is Timestamp:
pytest.skip("Timestamp cannot parse empty string")

assert klass(value).value == iNaT

Expand Down
6 changes: 6 additions & 0 deletions pandas/tests/scalar/timedelta/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,12 @@ def test_construction():
Timedelta("foo bar")


def test_construction_empty_string():
# Issue #36550, empty string
with pytest.raises(ValueError):
Timedelta("")


@pytest.mark.parametrize(
"item",
list(
Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/scalar/timestamp/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -552,6 +552,10 @@ def test_constructor_fromisocalendar(self):
assert result == expected_stdlib
assert isinstance(result, Timestamp)

def test_constructior_empty_string(self):
with pytest.raises(ValueError):
Timestamp("")


def test_constructor_ambigous_dst():
# GH 24329
Expand Down
18 changes: 18 additions & 0 deletions pandas/tests/series/methods/test_isin.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,21 @@ def test_isin_empty(self, empty):

result = s.isin(empty)
tm.assert_series_equal(expected, result)

@pytest.mark.parametrize(
"values, in_list, expected",
[
([""], ["", pd.Timedelta(0)], [True]),
(["", pd.Timedelta(0)], [""], [True, False]),
([""], ["", pd.to_datetime("2020-01-01")], [True]),
(["", pd.to_datetime("2020-01-01")], [""], [True, False]),
],
)
def test_empty_string_category(self, values, in_list, expected):
# Issue #36550
# Mixed empty string with datetimelike
s = pd.Series(values)
pd.testing.assert_series_equal(
s.isin(in_list),
pd.Series(expected),
)
18 changes: 14 additions & 4 deletions pandas/tests/tools/test_to_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -1090,6 +1090,16 @@ def test_to_datetime_fixed_offset(self):
result = to_datetime(dates)
assert result.tz == fixed_off

def test_to_datetime_empty_string(self):
with pytest.raises(ValueError):
pd.to_datetime("", errors="raise")

result = pd.to_datetime("", errors="ignore")
assert result == ""

result = pd.to_datetime("", errors="coerce")
assert result is pd.NaT


class TestToDatetimeUnit:
@pytest.mark.parametrize("cache", [True, False])
Expand Down Expand Up @@ -1574,11 +1584,11 @@ def test_to_datetime_with_apply(self, cache):
def test_to_datetime_types(self, cache):

# empty string
result = to_datetime("", cache=cache)
assert result is NaT
with pytest.raises(ValueError):
result = to_datetime("", cache=cache)

result = to_datetime(["", ""], cache=cache)
assert isna(result).all()
with pytest.raises(ValueError):
result = to_datetime(["", ""], cache=cache)

# ints
result = Timestamp(0)
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/tools/test_to_timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@
import pytest

import pandas as pd
from pandas import Series, TimedeltaIndex, isna, to_timedelta
from pandas import Series, TimedeltaIndex, to_timedelta
import pandas._testing as tm


class TestTimedeltas:
def test_to_timedelta(self):

result = to_timedelta(["", ""])
assert isna(result).all()
with pytest.raises(ValueError):
to_timedelta(["", ""])

# pass thru
result = to_timedelta(np.array([np.timedelta64(1, "s")]))
Expand Down