Skip to content

Commit 60a2d56

Browse files
BUG: fixed OutOfBoundsDatetime exception when errors=coerce #45319 (#47794)
* BUG: fixed OutOfBoundsDatetime exception when errors=coerce #45319 * BUG: Added test and release note #45319 * BUG: Restructured test parameters #45319 * BUG: Restructured test #45319 * BUG: Restructured parameters for test #45319 * BUG: Renamed test and added raise and ignore cases #45319 * BUG: Changed exception case #45319 Co-authored-by: Steven Rotondo <[email protected]>
1 parent d0bd469 commit 60a2d56

File tree

3 files changed

+37
-1
lines changed

3 files changed

+37
-1
lines changed

doc/source/whatsnew/v1.5.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -900,6 +900,7 @@ Datetimelike
900900
- Bug in :meth:`DatetimeIndex.resolution` incorrectly returning "day" instead of "nanosecond" for nanosecond-resolution indexes (:issue:`46903`)
901901
- Bug in :class:`Timestamp` with an integer or float value and ``unit="Y"`` or ``unit="M"`` giving slightly-wrong results (:issue:`47266`)
902902
- Bug in :class:`.DatetimeArray` construction when passed another :class:`.DatetimeArray` and ``freq=None`` incorrectly inferring the freq from the given array (:issue:`47296`)
903+
- Bug in :func:`to_datetime` where ``OutOfBoundsDatetime`` would be thrown even if ``errors=coerce`` if there were more than 50 rows (:issue:`45319`)
903904
- Bug when adding a :class:`DateOffset` to a :class:`Series` would not add the ``nanoseconds`` field (:issue:`47856`)
904905
-
905906

pandas/core/tools/datetimes.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,11 @@ def _maybe_cache(
228228
unique_dates = unique(arg)
229229
if len(unique_dates) < len(arg):
230230
cache_dates = convert_listlike(unique_dates, format)
231-
cache_array = Series(cache_dates, index=unique_dates)
231+
# GH#45319
232+
try:
233+
cache_array = Series(cache_dates, index=unique_dates)
234+
except OutOfBoundsDatetime:
235+
return cache_array
232236
# GH#39882 and GH#35888 in case of None and NaT we get duplicates
233237
if not cache_array.index.is_unique:
234238
cache_array = cache_array[~cache_array.index.duplicated()]

pandas/tests/tools/test_to_datetime.py

+31
Original file line numberDiff line numberDiff line change
@@ -2777,3 +2777,34 @@ def test_to_datetime_monotonic_increasing_index(cache):
27772777
result = to_datetime(times.iloc[:, 0], cache=cache)
27782778
expected = times.iloc[:, 0]
27792779
tm.assert_series_equal(result, expected)
2780+
2781+
2782+
@pytest.mark.parametrize(
2783+
"series_length",
2784+
[40, start_caching_at, (start_caching_at + 1), (start_caching_at + 5)],
2785+
)
2786+
def test_to_datetime_cache_coerce_50_lines_outofbounds(series_length):
2787+
# GH#45319
2788+
s = Series(
2789+
[datetime.fromisoformat("1446-04-12 00:00:00+00:00")]
2790+
+ ([datetime.fromisoformat("1991-10-20 00:00:00+00:00")] * series_length)
2791+
)
2792+
result1 = to_datetime(s, errors="coerce", utc=True)
2793+
2794+
expected1 = Series(
2795+
[NaT] + ([Timestamp("1991-10-20 00:00:00+00:00")] * series_length)
2796+
)
2797+
2798+
tm.assert_series_equal(result1, expected1)
2799+
2800+
result2 = to_datetime(s, errors="ignore", utc=True)
2801+
2802+
expected2 = Series(
2803+
[datetime.fromisoformat("1446-04-12 00:00:00+00:00")]
2804+
+ ([datetime.fromisoformat("1991-10-20 00:00:00+00:00")] * series_length)
2805+
)
2806+
2807+
tm.assert_series_equal(result2, expected2)
2808+
2809+
with pytest.raises(OutOfBoundsDatetime, match="Out of bounds nanosecond timestamp"):
2810+
to_datetime(s, errors="raise", utc=True)

0 commit comments

Comments
 (0)