Skip to content

Commit c275dbf

Browse files
BUG: catch out-of-bounds datetime64 in Series/DataFrame constructor (#26848)
1 parent b9b081d commit c275dbf

File tree

4 files changed

+46
-2
lines changed

4 files changed

+46
-2
lines changed

doc/source/whatsnew/v0.25.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -600,6 +600,7 @@ Datetimelike
600600
- Bug in :meth:`isin` for datetimelike indexes; :class:`DatetimeIndex`, :class:`TimedeltaIndex` and :class:`PeriodIndex` where the ``levels`` parameter was ignored. (:issue:`26675`)
601601
- Bug in :func:`to_datetime` which raises ``TypeError`` for ``format='%Y%m%d'`` when called for invalid integer dates with length >= 6 digits with ``errors='ignore'``
602602
- Bug when comparing a :class:`PeriodIndex` against a zero-dimensional numpy array (:issue:`26689`)
603+
- Bug in constructing a ``Series`` or ``DataFrame`` from a numpy ``datetime64`` array with a non-ns unit and out-of-bound timestamps generating rubbish data, which will now correctly raise an ``OutOfBoundsDatetime`` error (:issue:`26206`).
603604
- Bug in :func:`date_range` with unnecessary ``OverflowError`` being raised for very large or very small dates (:issue:`26651`)
604605

605606
Timedelta

pandas/core/dtypes/cast.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -1038,6 +1038,8 @@ def maybe_cast_to_datetime(value, dtype, errors='raise'):
10381038
.tz_convert(dtype.tz))
10391039
elif is_timedelta64:
10401040
value = to_timedelta(value, errors=errors)._values
1041+
except OutOfBoundsDatetime:
1042+
raise
10411043
except (AttributeError, ValueError, TypeError):
10421044
pass
10431045

@@ -1063,7 +1065,7 @@ def maybe_cast_to_datetime(value, dtype, errors='raise'):
10631065
dtype = value.dtype
10641066

10651067
if dtype.kind == 'M' and dtype != _NS_DTYPE:
1066-
value = value.astype(_NS_DTYPE)
1068+
value = tslibs.conversion.ensure_datetime64ns(value)
10671069

10681070
elif dtype.kind == 'm' and dtype != _TD_DTYPE:
10691071
value = to_timedelta(value)

pandas/core/internals/construction.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import numpy.ma as ma
99

1010
from pandas._libs import lib
11-
from pandas._libs.tslibs import IncompatibleFrequency
11+
from pandas._libs.tslibs import IncompatibleFrequency, OutOfBoundsDatetime
1212
from pandas.compat import raise_with_traceback
1313

1414
from pandas.core.dtypes.cast import (
@@ -700,6 +700,9 @@ def _try_cast(arr, take_fast_path, dtype, copy, raise_cast_failure):
700700
elif not is_extension_type(subarr):
701701
subarr = construct_1d_ndarray_preserving_na(subarr, dtype,
702702
copy=copy)
703+
except OutOfBoundsDatetime:
704+
# in case of out of bound datetime64 -> always raise
705+
raise
703706
except (ValueError, TypeError):
704707
if is_categorical_dtype(dtype):
705708
# We *do* allow casting to categorical, since we know

pandas/tests/test_base.py

+38
Original file line numberDiff line numberDiff line change
@@ -1341,3 +1341,41 @@ def test_to_numpy_dtype(as_series):
13411341
expected = np.array(['2000-01-01T05', '2001-01-01T05'],
13421342
dtype='M8[ns]')
13431343
tm.assert_numpy_array_equal(result, expected)
1344+
1345+
1346+
class TestConstruction:
1347+
# test certain constructor behaviours on dtype inference across Series,
1348+
# Index and DataFrame
1349+
1350+
@pytest.mark.parametrize("klass", [
1351+
Series,
1352+
lambda x, **kwargs: DataFrame({'a': x}, **kwargs)['a'],
1353+
pytest.param(lambda x, **kwargs: DataFrame(x, **kwargs)[0],
1354+
marks=pytest.mark.xfail),
1355+
Index,
1356+
])
1357+
@pytest.mark.parametrize("a", [
1358+
np.array(['2263-01-01'], dtype='datetime64[D]'),
1359+
np.array([datetime(2263, 1, 1)], dtype=object),
1360+
np.array([np.datetime64('2263-01-01', 'D')], dtype=object),
1361+
np.array(["2263-01-01"], dtype=object)
1362+
], ids=['datetime64[D]', 'object-datetime.datetime',
1363+
'object-numpy-scalar', 'object-string'])
1364+
def test_constructor_datetime_outofbound(self, a, klass):
1365+
# GH-26853 (+ bug GH-26206 out of bound non-ns unit)
1366+
1367+
# No dtype specified (dtype inference)
1368+
# datetime64[non-ns] raise error, other cases result in object dtype
1369+
# and preserve original data
1370+
if a.dtype.kind == 'M':
1371+
with pytest.raises(pd.errors.OutOfBoundsDatetime):
1372+
klass(a)
1373+
else:
1374+
result = klass(a)
1375+
assert result.dtype == 'object'
1376+
tm.assert_numpy_array_equal(result.to_numpy(), a)
1377+
1378+
# Explicit dtype specified
1379+
# Forced conversion fails for all -> all cases raise error
1380+
with pytest.raises(pd.errors.OutOfBoundsDatetime):
1381+
klass(a, dtype='datetime64[ns]')

0 commit comments

Comments
 (0)