Skip to content

Commit 5d7bd5a

Browse files
jbrockmendelpooja-subramaniam
authored andcommitted
REF: consolidate datetime parsing paths and exception handling (pandas-dev#50790)
* REF: fewer paths through datetime parsing code * dont catch in conversion.pyx * REF: move exception handling within dateutil_parse
1 parent 71498fe commit 5d7bd5a

File tree

11 files changed

+44
-51
lines changed

11 files changed

+44
-51
lines changed

doc/source/user_guide/timeseries.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ The default behavior, ``errors='raise'``, is to raise when unparsable:
292292
.. code-block:: ipython
293293
294294
In [2]: pd.to_datetime(['2009/07/31', 'asd'], errors='raise')
295-
ValueError: Unknown string format
295+
ValueError: Unknown datetime string format
296296
297297
Pass ``errors='ignore'`` to return the original input when unparsable:
298298

pandas/_libs/tslibs/conversion.pyx

+3-10
Original file line numberDiff line numberDiff line change
@@ -544,16 +544,9 @@ cdef _TSObject convert_str_to_tsobject(str ts, tzinfo tz, str unit,
544544
maybe_localize_tso(obj, tz, obj.creso)
545545
return obj
546546

547-
try:
548-
dt = parse_datetime_string(
549-
ts, dayfirst=dayfirst, yearfirst=yearfirst
550-
)
551-
except ValueError as err:
552-
if "out of range for month" in str(err):
553-
# dateutil raised when constructing a datetime object,
554-
# let's give a nicer exception message
555-
raise ValueError("could not convert string to Timestamp") from err
556-
raise
547+
dt = parse_datetime_string(
548+
ts, dayfirst=dayfirst, yearfirst=yearfirst
549+
)
557550

558551
return convert_datetime_to_tsobject(dt, tz)
559552

pandas/_libs/tslibs/parsing.pyx

+26-27
Original file line numberDiff line numberDiff line change
@@ -286,19 +286,10 @@ def parse_datetime_string(
286286
except ValueError:
287287
pass
288288

289-
try:
290-
dt = du_parse(date_string, default=_DEFAULT_DATETIME,
291-
dayfirst=dayfirst, yearfirst=yearfirst)
292-
except TypeError:
293-
# following may be raised from dateutil
294-
# TypeError: 'NoneType' object is not iterable
295-
raise ValueError(f'Given date string "{date_string}" not likely a datetime')
296-
except OverflowError as err:
297-
# with e.g. "08335394550" dateutil raises when trying to pass
298-
# year=8335394550 to datetime.replace
299-
raise OutOfBoundsDatetime(
300-
f'Parsing "{date_string}" to datetime overflows'
301-
) from err
289+
dt, _ = dateutil_parse(date_string, default=_DEFAULT_DATETIME,
290+
dayfirst=dayfirst, yearfirst=yearfirst,
291+
ignoretz=False)
292+
302293
if dt.tzinfo is not None:
303294
# dateutil can return a datetime with a tzoffset outside of (-24H, 24H)
304295
# bounds, which is invalid (can be constructed, but raises if we call
@@ -415,15 +406,9 @@ def parse_datetime_string_with_reso(
415406
except ValueError:
416407
pass
417408

418-
try:
419-
parsed, reso = dateutil_parse(date_string, _DEFAULT_DATETIME,
420-
dayfirst=dayfirst, yearfirst=yearfirst,
421-
ignoretz=False)
422-
except (ValueError, OverflowError) as err:
423-
# TODO: allow raise of errors within instead
424-
raise DateParseError(err)
425-
if parsed is None:
426-
raise DateParseError(f"Could not parse {date_string}")
409+
parsed, reso = dateutil_parse(date_string, _DEFAULT_DATETIME,
410+
dayfirst=dayfirst, yearfirst=yearfirst,
411+
ignoretz=False)
427412
return parsed, reso
428413

429414

@@ -612,7 +597,7 @@ cpdef quarter_to_myear(int year, int quarter, str freq):
612597

613598
cdef dateutil_parse(
614599
str timestr,
615-
object default,
600+
datetime default,
616601
bint ignoretz=False,
617602
bint dayfirst=False,
618603
bint yearfirst=False,
@@ -623,13 +608,15 @@ cdef dateutil_parse(
623608
str attr
624609
datetime ret
625610
object res
626-
object reso = None
611+
str reso = None
627612
dict repl = {}
628613

629614
res, _ = DEFAULTPARSER._parse(timestr, dayfirst=dayfirst, yearfirst=yearfirst)
630615

631616
if res is None:
632-
raise ValueError(f"Unknown datetime string format, unable to parse: {timestr}")
617+
raise DateParseError(
618+
f"Unknown datetime string format, unable to parse: {timestr}"
619+
)
633620

634621
for attr in ["year", "month", "day", "hour",
635622
"minute", "second", "microsecond"]:
@@ -639,15 +626,27 @@ cdef dateutil_parse(
639626
reso = attr
640627

641628
if reso is None:
642-
raise ValueError(f"Unable to parse datetime string: {timestr}")
629+
raise DateParseError(f"Unable to parse datetime string: {timestr}")
643630

644631
if reso == "microsecond":
645632
if repl["microsecond"] == 0:
646633
reso = "second"
647634
elif repl["microsecond"] % 1000 == 0:
648635
reso = "millisecond"
649636

650-
ret = default.replace(**repl)
637+
try:
638+
ret = default.replace(**repl)
639+
except ValueError as err:
640+
# e.g. "day is out of range for month"
641+
# we re-raise to match dateutil's exception message
642+
raise DateParseError(str(err) + ": " + timestr) from err
643+
except OverflowError as err:
644+
# with e.g. "08335394550" dateutil raises when trying to pass
645+
# year=8335394550 to datetime.replace
646+
raise OutOfBoundsDatetime(
647+
f'Parsing "{timestr}" to datetime overflows'
648+
) from err
649+
651650
if res.weekday is not None and not res.day:
652651
ret = ret + relativedelta.relativedelta(weekday=res.weekday)
653652
if not ignoretz:

pandas/tests/frame/test_block_internals.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,7 @@ def f(dtype):
259259
f("float64")
260260

261261
# 10822
262-
msg = "^Unknown string format: aa, at position 0$"
262+
msg = "^Unknown datetime string format, unable to parse: aa, at position 0$"
263263
with pytest.raises(ValueError, match=msg):
264264
f("M8[ns]")
265265

pandas/tests/indexes/datetimes/test_constructors.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1037,7 +1037,7 @@ def test_from_freq_recreate_from_data(self, freq):
10371037

10381038
def test_datetimeindex_constructor_misc(self):
10391039
arr = ["1/1/2005", "1/2/2005", "Jn 3, 2005", "2005-01-04"]
1040-
msg = r"(\(')?Unknown string format(:', 'Jn 3, 2005'\))?"
1040+
msg = r"(\(')?Unknown datetime string format(:', 'Jn 3, 2005'\))?"
10411041
with pytest.raises(ValueError, match=msg):
10421042
DatetimeIndex(arr)
10431043

pandas/tests/indexes/datetimes/test_date_range.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -980,7 +980,7 @@ def test_misc(self):
980980
def test_date_parse_failure(self):
981981
badly_formed_date = "2007/100/1"
982982

983-
msg = "Unknown string format: 2007/100/1"
983+
msg = "Unknown datetime string format, unable to parse: 2007/100/1"
984984
with pytest.raises(ValueError, match=msg):
985985
Timestamp(badly_formed_date)
986986

pandas/tests/io/pytables/test_select.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -594,7 +594,7 @@ def test_frame_select(setup_path):
594594
# invalid terms
595595
df = tm.makeTimeDataFrame()
596596
store.append("df_time", df)
597-
msg = "could not convert string to Timestamp"
597+
msg = "day is out of range for month: 0"
598598
with pytest.raises(ValueError, match=msg):
599599
store.select("df_time", "index>0")
600600

pandas/tests/scalar/timestamp/test_constructors.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -439,7 +439,7 @@ def test_constructor_nanosecond(self, result):
439439
@pytest.mark.parametrize("z", ["Z0", "Z00"])
440440
def test_constructor_invalid_Z0_isostring(self, z):
441441
# GH 8910
442-
msg = f"Unknown string format: 2014-11-02 01:00{z}"
442+
msg = f"Unknown datetime string format, unable to parse: 2014-11-02 01:00{z}"
443443
with pytest.raises(ValueError, match=msg):
444444
Timestamp(f"2014-11-02 01:00{z}")
445445

pandas/tests/series/test_constructors.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ def test_infer_with_date_and_datetime(self):
7575
def test_unparseable_strings_with_dt64_dtype(self):
7676
# pre-2.0 these would be silently ignored and come back with object dtype
7777
vals = ["aa"]
78-
msg = "^Unknown string format: aa, at position 0$"
78+
msg = "^Unknown datetime string format, unable to parse: aa, at position 0$"
7979
with pytest.raises(ValueError, match=msg):
8080
Series(vals, dtype="datetime64[ns]")
8181

pandas/tests/tools/test_to_datetime.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -2498,7 +2498,7 @@ def test_string_na_nat_conversion_malformed(self, cache):
24982498
malformed = np.array(["1/100/2000", np.nan], dtype=object)
24992499

25002500
# GH 10636, default is now 'raise'
2501-
msg = r"Unknown string format:|day is out of range for month"
2501+
msg = r"Unknown datetime string format"
25022502
with pytest.raises(ValueError, match=msg):
25032503
with tm.assert_produces_warning(
25042504
UserWarning, match="Could not infer format"
@@ -2812,7 +2812,7 @@ def test_day_not_in_month_coerce(self, cache, arg, format, warning):
28122812
assert isna(to_datetime(arg, errors="coerce", format=format, cache=cache))
28132813

28142814
def test_day_not_in_month_raise(self, cache):
2815-
msg = "could not convert string to Timestamp"
2815+
msg = "day is out of range for month: 2015-02-29, at position 0"
28162816
with pytest.raises(ValueError, match=msg):
28172817
with tm.assert_produces_warning(
28182818
UserWarning, match="Could not infer format"
@@ -3239,9 +3239,10 @@ def test_invalid_origins_tzinfo(self):
32393239

32403240
def test_incorrect_value_exception(self):
32413241
# GH47495
3242-
with pytest.raises(
3243-
ValueError, match="Unknown string format: yesterday, at position 1"
3244-
):
3242+
msg = (
3243+
"Unknown datetime string format, unable to parse: yesterday, at position 1"
3244+
)
3245+
with pytest.raises(ValueError, match=msg):
32453246
with tm.assert_produces_warning(
32463247
UserWarning, match="Could not infer format"
32473248
):

pandas/tests/tseries/frequencies/test_inference.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -387,7 +387,7 @@ def test_invalid_index_types_unicode():
387387
# see gh-10822
388388
#
389389
# Odd error message on conversions to datetime for unicode.
390-
msg = "Unknown string format"
390+
msg = "Unknown datetime string format"
391391

392392
with pytest.raises(ValueError, match=msg):
393393
frequencies.infer_freq(tm.makeStringIndex(10))
@@ -422,7 +422,7 @@ def test_series_invalid_type(end):
422422

423423
def test_series_inconvertible_string():
424424
# see gh-6407
425-
msg = "Unknown string format"
425+
msg = "Unknown datetime string format"
426426

427427
with pytest.raises(ValueError, match=msg):
428428
frequencies.infer_freq(Series(["foo", "bar"]))

0 commit comments

Comments
 (0)