Skip to content

Commit 57c7daa

Browse files
mingglijreback
authored andcommitted
BUG: DataFrame.replace with out of bound datetime causing RecursionError (#22108)
1 parent b62c324 commit 57c7daa

File tree

4 files changed

+46
-41
lines changed

4 files changed

+46
-41
lines changed

doc/source/whatsnew/v0.24.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -652,7 +652,7 @@ Reshaping
652652
- Bug in :meth:`Series.combine_first` with ``datetime64[ns, tz]`` dtype which would return tz-naive result (:issue:`21469`)
653653
- Bug in :meth:`Series.where` and :meth:`DataFrame.where` with ``datetime64[ns, tz]`` dtype (:issue:`21546`)
654654
- Bug in :meth:`Series.mask` and :meth:`DataFrame.mask` with ``list`` conditionals (:issue:`21891`)
655-
-
655+
- Bug in :meth:`DataFrame.replace` raises RecursionError when converting OutOfBounds ``datetime64[ns, tz]`` (:issue:`20380`)
656656
-
657657

658658
Build Changes

pandas/core/dtypes/cast.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import warnings
77

88
from pandas._libs import tslib, lib, tslibs
9-
from pandas._libs.tslibs import iNaT
9+
from pandas._libs.tslibs import iNaT, OutOfBoundsDatetime
1010
from pandas.compat import string_types, text_type, PY3
1111
from .common import (ensure_object, is_bool, is_integer, is_float,
1212
is_complex, is_datetimetz, is_categorical_dtype,
@@ -838,7 +838,13 @@ def soft_convert_objects(values, datetime=True, numeric=True, timedelta=True,
838838

839839
# Soft conversions
840840
if datetime:
841-
values = lib.maybe_convert_objects(values, convert_datetime=datetime)
841+
# GH 20380, when datetime is beyond year 2262, hence outside
842+
# bound of nanosecond-resolution 64-bit integers.
843+
try:
844+
values = lib.maybe_convert_objects(values,
845+
convert_datetime=datetime)
846+
except OutOfBoundsDatetime:
847+
pass
842848

843849
if timedelta and is_object_dtype(values.dtype):
844850
# Object check to ensure only run if previous did not convert

pandas/core/internals/blocks.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -802,12 +802,14 @@ def replace(self, to_replace, value, inplace=False, filter=None,
802802
copy=not inplace) for b in blocks]
803803
return blocks
804804
except (TypeError, ValueError):
805-
806805
# try again with a compatible block
807806
block = self.astype(object)
808-
return block.replace(
809-
to_replace=original_to_replace, value=value, inplace=inplace,
810-
filter=filter, regex=regex, convert=convert)
807+
return block.replace(to_replace=original_to_replace,
808+
value=value,
809+
inplace=inplace,
810+
filter=filter,
811+
regex=regex,
812+
convert=convert)
811813

812814
def _replace_single(self, *args, **kwargs):
813815
""" no-op on a non-ObjectBlock """

pandas/tests/frame/test_replace.py

+31-34
Original file line numberDiff line numberDiff line change
@@ -755,40 +755,37 @@ def test_replace_for_new_dtypes(self):
755755
result = tsframe.fillna(method='bfill')
756756
assert_frame_equal(result, tsframe.fillna(method='bfill'))
757757

758-
def test_replace_dtypes(self):
759-
# int
760-
df = DataFrame({'ints': [1, 2, 3]})
761-
result = df.replace(1, 0)
762-
expected = DataFrame({'ints': [0, 2, 3]})
763-
assert_frame_equal(result, expected)
764-
765-
df = DataFrame({'ints': [1, 2, 3]}, dtype=np.int32)
766-
result = df.replace(1, 0)
767-
expected = DataFrame({'ints': [0, 2, 3]}, dtype=np.int32)
768-
assert_frame_equal(result, expected)
769-
770-
df = DataFrame({'ints': [1, 2, 3]}, dtype=np.int16)
771-
result = df.replace(1, 0)
772-
expected = DataFrame({'ints': [0, 2, 3]}, dtype=np.int16)
773-
assert_frame_equal(result, expected)
774-
775-
# bools
776-
df = DataFrame({'bools': [True, False, True]})
777-
result = df.replace(False, True)
778-
assert result.values.all()
779-
780-
# complex blocks
781-
df = DataFrame({'complex': [1j, 2j, 3j]})
782-
result = df.replace(1j, 0j)
783-
expected = DataFrame({'complex': [0j, 2j, 3j]})
784-
assert_frame_equal(result, expected)
785-
786-
# datetime blocks
787-
prev = datetime.today()
788-
now = datetime.today()
789-
df = DataFrame({'datetime64': Index([prev, now, prev])})
790-
result = df.replace(prev, now)
791-
expected = DataFrame({'datetime64': Index([now] * 3)})
758+
@pytest.mark.parametrize('frame, to_replace, value, expected', [
759+
(DataFrame({'ints': [1, 2, 3]}), 1, 0,
760+
DataFrame({'ints': [0, 2, 3]})),
761+
(DataFrame({'ints': [1, 2, 3]}, dtype=np.int32), 1, 0,
762+
DataFrame({'ints': [0, 2, 3]}, dtype=np.int32)),
763+
(DataFrame({'ints': [1, 2, 3]}, dtype=np.int16), 1, 0,
764+
DataFrame({'ints': [0, 2, 3]}, dtype=np.int16)),
765+
(DataFrame({'bools': [True, False, True]}), False, True,
766+
DataFrame({'bools': [True, True, True]})),
767+
(DataFrame({'complex': [1j, 2j, 3j]}), 1j, 0,
768+
DataFrame({'complex': [0j, 2j, 3j]})),
769+
(DataFrame({'datetime64': Index([datetime(2018, 5, 28),
770+
datetime(2018, 7, 28),
771+
datetime(2018, 5, 28)])}),
772+
datetime(2018, 5, 28), datetime(2018, 7, 28),
773+
DataFrame({'datetime64': Index([datetime(2018, 7, 28)] * 3)})),
774+
# GH 20380
775+
(DataFrame({'dt': [datetime(3017, 12, 20)], 'str': ['foo']}),
776+
'foo', 'bar',
777+
DataFrame({'dt': [datetime(3017, 12, 20)], 'str': ['bar']})),
778+
(DataFrame({'A': date_range('20130101', periods=3, tz='US/Eastern'),
779+
'B': [0, np.nan, 2]}),
780+
Timestamp('20130102', tz='US/Eastern'),
781+
Timestamp('20130104', tz='US/Eastern'),
782+
DataFrame({'A': [Timestamp('20130101', tz='US/Eastern'),
783+
Timestamp('20130104', tz='US/Eastern'),
784+
Timestamp('20130103', tz='US/Eastern')],
785+
'B': [0, np.nan, 2]}))
786+
])
787+
def test_replace_dtypes(self, frame, to_replace, value, expected):
788+
result = getattr(frame, 'replace')(to_replace, value)
792789
assert_frame_equal(result, expected)
793790

794791
def test_replace_input_formats_listlike(self):

0 commit comments

Comments
 (0)