Skip to content

Commit 84522a0

Browse files
jbrockmendeljreback
authored andcommitted
Fix parsing corner case closes pandas-dev#19382 (pandas-dev#19529)
1 parent a22acc2 commit 84522a0

File tree

5 files changed

+56
-7
lines changed

5 files changed

+56
-7
lines changed

doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -550,6 +550,7 @@ Datetimelike
550550
- Bug in :func:`Series.truncate` which raises ``TypeError`` with a monotonic ``PeriodIndex`` (:issue:`17717`)
551551
- Bug in :func:`~DataFrame.pct_change` using ``periods`` and ``freq`` returned different length outputs (:issue:`7292`)
552552
- Bug in comparison of :class:`DatetimeIndex` against ``None`` or ``datetime.date`` objects raising ``TypeError`` for ``==`` and ``!=`` comparisons instead of all-``False`` and all-``True``, respectively (:issue:`19301`)
553+
- Bug in :class:`Timestamp` and :func:`to_datetime` where a string representing a barely out-of-bounds timestamp would be incorrectly rounded down instead of raising ``OutOfBoundsDatetime`` (:issue:`19382`)
553554
-
554555

555556
Timezones

pandas/_libs/tslib.pyx

+24-6
Original file line numberDiff line numberDiff line change
@@ -609,20 +609,38 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
609609
value = tz_convert_single(value, tz, 'UTC')
610610
iresult[i] = value
611611
check_dts_bounds(&dts)
612+
except OutOfBoundsDatetime:
613+
# GH#19382 for just-barely-OutOfBounds falling back to
614+
# dateutil parser will return incorrect result because
615+
# it will ignore nanoseconds
616+
if require_iso8601:
617+
if _parse_today_now(val, &iresult[i]):
618+
continue
619+
elif is_coerce:
620+
iresult[i] = NPY_NAT
621+
continue
622+
elif is_raise:
623+
raise ValueError("time data {val} doesn't match "
624+
"format specified"
625+
.format(val=val))
626+
return values
627+
elif is_coerce:
628+
iresult[i] = NPY_NAT
629+
continue
630+
raise
612631
except ValueError:
613632
# if requiring iso8601 strings, skip trying other formats
614633
if require_iso8601:
615634
if _parse_today_now(val, &iresult[i]):
616635
continue
617-
if is_coerce:
636+
elif is_coerce:
618637
iresult[i] = NPY_NAT
619638
continue
620639
elif is_raise:
621-
raise ValueError(
622-
"time data %r doesn't match format "
623-
"specified" % (val,))
624-
else:
625-
return values
640+
raise ValueError("time data {val} doesn't match "
641+
"format specified"
642+
.format(val=val))
643+
return values
626644

627645
try:
628646
py_dt = parse_datetime_string(val, dayfirst=dayfirst,

pandas/_libs/tslibs/conversion.pyx

+8
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ from np_datetime cimport (check_dts_bounds,
2626
dt64_to_dtstruct, dtstruct_to_dt64,
2727
get_datetime64_unit, get_datetime64_value,
2828
pydatetime_to_dt64)
29+
from np_datetime import OutOfBoundsDatetime
2930

3031
from util cimport (is_string_object,
3132
is_datetime64_object,
@@ -472,6 +473,13 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit,
472473
ts = tz_localize_to_utc(np.array([ts], dtype='i8'), tz,
473474
ambiguous='raise',
474475
errors='raise')[0]
476+
477+
except OutOfBoundsDatetime:
478+
# GH#19382 for just-barely-OutOfBounds falling back to dateutil
479+
# parser will return incorrect result because it will ignore
480+
# nanoseconds
481+
raise
482+
475483
except ValueError:
476484
try:
477485
ts = parse_datetime_string(ts, dayfirst=dayfirst,

pandas/tests/indexes/datetimes/test_tools.py

+15-1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from pandas._libs.tslibs import parsing
1818
from pandas.core.tools import datetimes as tools
1919

20+
from pandas.errors import OutOfBoundsDatetime
2021
from pandas.compat import lmap
2122
from pandas.compat.numpy import np_array_datetime64_compat
2223
from pandas.core.dtypes.common import is_datetime64_ns_dtype
@@ -783,7 +784,6 @@ def test_dataframe_dtypes(self, cache):
783784

784785

785786
class TestToDatetimeMisc(object):
786-
787787
@pytest.mark.parametrize('cache', [True, False])
788788
def test_to_datetime_iso8601(self, cache):
789789
result = to_datetime(["2012-01-01 00:00:00"], cache=cache)
@@ -1596,6 +1596,20 @@ def test_coerce_of_invalid_datetimes(self):
15961596
)
15971597
)
15981598

1599+
def test_to_datetime_barely_out_of_bounds(self):
1600+
# GH#19529
1601+
# GH#19382 close enough to bounds that dropping nanos would result
1602+
# in an in-bounds datetime
1603+
arr = np.array(['2262-04-11 23:47:16.854775808'], dtype=object)
1604+
1605+
with pytest.raises(OutOfBoundsDatetime):
1606+
to_datetime(arr)
1607+
1608+
with pytest.raises(OutOfBoundsDatetime):
1609+
# Essentially the same as above, but more directly calling
1610+
# the relevant function
1611+
tslib.array_to_datetime(arr)
1612+
15991613

16001614
def test_normalize_date():
16011615
value = date(2012, 9, 7)

pandas/tests/scalar/test_timestamp.py

+8
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from pandas._libs.tslibs import conversion
1919
from pandas._libs.tslibs.timezones import get_timezone, dateutil_gettz as gettz
2020

21+
from pandas.errors import OutOfBoundsDatetime
2122
from pandas.compat import long, PY3
2223
from pandas.compat.numpy import np_datetime64_compat
2324
from pandas import Timestamp, Period, Timedelta
@@ -410,6 +411,13 @@ def test_out_of_bounds_string(self):
410411
with pytest.raises(ValueError):
411412
Timestamp('2263-01-01')
412413

414+
def test_barely_out_of_bounds(self):
415+
# GH#19529
416+
# GH#19382 close enough to bounds that dropping nanos would result
417+
# in an in-bounds datetime
418+
with pytest.raises(OutOfBoundsDatetime):
419+
Timestamp('2262-04-11 23:47:16.854775808')
420+
413421
def test_bounds_with_different_units(self):
414422
out_of_bounds_dates = ('1677-09-21', '2262-04-12')
415423

0 commit comments

Comments
 (0)