From 0213b8210e304cd1018fe62cab99b2cf0e61278e Mon Sep 17 00:00:00 2001 From: Ming Li Date: Sat, 28 Jul 2018 22:41:59 +0100 Subject: [PATCH 1/7] convert=False during exception handling in Block.replace --- pandas/core/internals/blocks.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index ffa2267dd6877..751ac0b2e8c4a 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -802,12 +802,15 @@ def replace(self, to_replace, value, inplace=False, filter=None, copy=not inplace) for b in blocks] return blocks except (TypeError, ValueError): - # try again with a compatible block block = self.astype(object) - return block.replace( - to_replace=original_to_replace, value=value, inplace=inplace, - filter=filter, regex=regex, convert=convert) + return block.replace(to_replace=original_to_replace, + value=value, + inplace=inplace, + filter=filter, + regex=regex, + # GH 20380 without convert + convert=False) def _replace_single(self, *args, **kwargs): """ no-op on a non-ObjectBlock """ From 42fc85edfbbb9f3eaabd136b448e2149ce530d9e Mon Sep 17 00:00:00 2001 From: Ming Li Date: Sat, 28 Jul 2018 23:23:29 +0100 Subject: [PATCH 2/7] try convert datetime --- pandas/core/dtypes/cast.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index ead7b39309f5e..16f853700f225 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -838,7 +838,12 @@ def soft_convert_objects(values, datetime=True, numeric=True, timedelta=True, # Soft conversions if datetime: - values = lib.maybe_convert_objects(values, convert_datetime=datetime) + # GH 20380 + try: + values = lib.maybe_convert_objects(values, + convert_datetime=datetime) + except ValueError: + pass if timedelta and is_object_dtype(values.dtype): # Object check to ensure only run if previous did not convert From 02e3f2ff3864c0eb32bf4613c9d63b0eb1f87a4d Mon Sep 17 00:00:00 2001 From: Ming Li Date: Sat, 28 Jul 2018 23:23:55 +0100 Subject: [PATCH 3/7] refactor and add test case --- pandas/tests/frame/test_replace.py | 57 ++++++++++++------------------ 1 file changed, 23 insertions(+), 34 deletions(-) diff --git a/pandas/tests/frame/test_replace.py b/pandas/tests/frame/test_replace.py index dd83a94b7062a..c215a14f5243a 100644 --- a/pandas/tests/frame/test_replace.py +++ b/pandas/tests/frame/test_replace.py @@ -757,40 +757,29 @@ def test_replace_for_new_dtypes(self): result = tsframe.fillna(method='bfill') assert_frame_equal(result, tsframe.fillna(method='bfill')) - def test_replace_dtypes(self): - # int - df = DataFrame({'ints': [1, 2, 3]}) - result = df.replace(1, 0) - expected = DataFrame({'ints': [0, 2, 3]}) - assert_frame_equal(result, expected) - - df = DataFrame({'ints': [1, 2, 3]}, dtype=np.int32) - result = df.replace(1, 0) - expected = DataFrame({'ints': [0, 2, 3]}, dtype=np.int32) - assert_frame_equal(result, expected) - - df = DataFrame({'ints': [1, 2, 3]}, dtype=np.int16) - result = df.replace(1, 0) - expected = DataFrame({'ints': [0, 2, 3]}, dtype=np.int16) - assert_frame_equal(result, expected) - - # bools - df = DataFrame({'bools': [True, False, True]}) - result = df.replace(False, True) - assert result.values.all() - - # complex blocks - df = DataFrame({'complex': [1j, 2j, 3j]}) - result = df.replace(1j, 0j) - expected = DataFrame({'complex': [0j, 2j, 3j]}) - assert_frame_equal(result, expected) - - # datetime blocks - prev = datetime.today() - now = datetime.today() - df = DataFrame({'datetime64': Index([prev, now, prev])}) - result = df.replace(prev, now) - expected = DataFrame({'datetime64': Index([now] * 3)}) + @pytest.mark.parametrize('frame, to_replace, value, expected', [ + (DataFrame({'ints': [1, 2, 3]}), 1, 0, + DataFrame({'ints': [0, 2, 3]})), + (DataFrame({'ints': [1, 2, 3]}, dtype=np.int32), 1, 0, + DataFrame({'ints': [0, 2, 3]}, dtype=np.int32)), + (DataFrame({'ints': [1, 2, 3]}, dtype=np.int16), 1, 0, + DataFrame({'ints': [0, 2, 3]}, dtype=np.int16)), + (DataFrame({'bools': [True, False, True]}), False, True, + DataFrame({'bools': [True, True, True]})), + (DataFrame({'complex': [1j, 2j, 3j]}), 1j, 0, + DataFrame({'complex': [0j, 2j, 3j]})), + (DataFrame({'datetime64': Index([datetime(2018, 5, 28), + datetime(2018, 7, 28), + datetime(2018, 5, 28)])}), + datetime(2018, 5, 28), datetime(2018, 7, 28), + DataFrame({'datetime64': Index([datetime(2018, 7, 28)] * 3)})), + # GH 20380 + (DataFrame({'dt': [datetime(3017, 12, 20)], 'str': ['foo']}), + 'foo', 'bar', + DataFrame({'dt': [datetime(3017, 12, 20)], 'str': ['bar']})) + ]) + def test_replace_dtypes(self, frame, to_replace, value, expected): + result = getattr(frame, 'replace')(to_replace, value) assert_frame_equal(result, expected) def test_replace_input_formats_listlike(self): From fcfd67599dd9334a3bae60b4f6143d7a6216e69c Mon Sep 17 00:00:00 2001 From: Ming Li Date: Sat, 28 Jul 2018 23:24:21 +0100 Subject: [PATCH 4/7] remove False convert --- pandas/core/internals/blocks.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 751ac0b2e8c4a..d50b3e8a83928 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -809,8 +809,7 @@ def replace(self, to_replace, value, inplace=False, filter=None, inplace=inplace, filter=filter, regex=regex, - # GH 20380 without convert - convert=False) + convert=convert) def _replace_single(self, *args, **kwargs): """ no-op on a non-ObjectBlock """ From 8bdcd62ff7e3df1db9ab40f327082cc31663f182 Mon Sep 17 00:00:00 2001 From: Ming Li Date: Sun, 29 Jul 2018 19:57:37 +0100 Subject: [PATCH 5/7] add dt64 with tzinfo --- pandas/tests/frame/test_replace.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_replace.py b/pandas/tests/frame/test_replace.py index c215a14f5243a..e4ba07049c298 100644 --- a/pandas/tests/frame/test_replace.py +++ b/pandas/tests/frame/test_replace.py @@ -776,7 +776,15 @@ def test_replace_for_new_dtypes(self): # GH 20380 (DataFrame({'dt': [datetime(3017, 12, 20)], 'str': ['foo']}), 'foo', 'bar', - DataFrame({'dt': [datetime(3017, 12, 20)], 'str': ['bar']})) + DataFrame({'dt': [datetime(3017, 12, 20)], 'str': ['bar']})), + (DataFrame({'A': date_range('20130101', periods=3, tz='US/Eastern'), + 'B': [0, np.nan, 2]}), + Timestamp('20130102', tz='US/Eastern'), + Timestamp('20130104', tz='US/Eastern'), + DataFrame({'A': [Timestamp('20130101', tz='US/Eastern'), + Timestamp('20130104', tz='US/Eastern'), + Timestamp('20130103', tz='US/Eastern')], + 'B': [0, np.nan, 2]})) ]) def test_replace_dtypes(self, frame, to_replace, value, expected): result = getattr(frame, 'replace')(to_replace, value) From e3aba78263dc5ab813c7c9cebf2c48b71888791e Mon Sep 17 00:00:00 2001 From: Ming Li Date: Sun, 29 Jul 2018 20:38:44 +0100 Subject: [PATCH 6/7] restricter exception handling and clearer comments --- pandas/core/dtypes/cast.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 16f853700f225..72b43f5d51901 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -6,7 +6,7 @@ import warnings from pandas._libs import tslib, lib, tslibs -from pandas._libs.tslibs import iNaT +from pandas._libs.tslibs import iNaT, OutOfBoundsDatetime from pandas.compat import string_types, text_type, PY3 from .common import (ensure_object, is_bool, is_integer, is_float, is_complex, is_datetimetz, is_categorical_dtype, @@ -838,11 +838,12 @@ def soft_convert_objects(values, datetime=True, numeric=True, timedelta=True, # Soft conversions if datetime: - # GH 20380 + # GH 20380, when datetime is beyond year 2262, hence outside + # bound of nanosecond-resolution 64-bit integers. try: values = lib.maybe_convert_objects(values, convert_datetime=datetime) - except ValueError: + except OutOfBoundsDatetime: pass if timedelta and is_object_dtype(values.dtype): From cb2a3c69613a9774748650b499da10e2cf3998d5 Mon Sep 17 00:00:00 2001 From: Ming Li Date: Sun, 29 Jul 2018 21:16:35 +0100 Subject: [PATCH 7/7] add whatsnew --- doc/source/whatsnew/v0.24.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 42e286f487a7d..95767b4e03822 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -558,7 +558,7 @@ Reshaping - Bug in :meth:`Series.combine_first` with ``datetime64[ns, tz]`` dtype which would return tz-naive result (:issue:`21469`) - Bug in :meth:`Series.where` and :meth:`DataFrame.where` with ``datetime64[ns, tz]`` dtype (:issue:`21546`) - Bug in :meth:`Series.mask` and :meth:`DataFrame.mask` with ``list`` conditionals (:issue:`21891`) -- +- Bug in :meth:`DataFrame.replace` raises RecursionError when converting OutOfBounds ``datetime64[ns, tz]`` (:issue:`20380`) - Build Changes