From 91997917dcb7982a35ad918b1b442656ed0821ae Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Mon, 11 May 2020 15:24:47 +0100 Subject: [PATCH 1/2] Backport PR #34048 on branch 1.0.x (Bug in DataFrame.replace casts columns to ``object`` dtype if items in ``to_replace`` not in values) --- doc/source/whatsnew/v1.0.4.rst | 1 + pandas/core/internals/blocks.py | 5 ----- pandas/tests/frame/methods/test_replace.py | 19 +++++++++++++++++++ 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v1.0.4.rst b/doc/source/whatsnew/v1.0.4.rst index 4f122b0dea5f4..5177d8aa42054 100644 --- a/doc/source/whatsnew/v1.0.4.rst +++ b/doc/source/whatsnew/v1.0.4.rst @@ -23,6 +23,7 @@ Fixed regressions - Bug where an ordered :class:`Categorical` containing only ``NaN`` values would raise rather than returning ``NaN`` when taking the minimum or maximum (:issue:`33450`) - Bug in :meth:`DataFrameGroupBy.agg` with dictionary input losing ``ExtensionArray`` dtypes (:issue:`32194`) - Fix to preserve the ability to index with the "nearest" method with xarray's CFTimeIndex, an :class:`Index` subclass (`pydata/xarray#3751 `_, :issue:`32905`). +- Bug in :meth:`DataFrame.replace` casts columns to ``object`` dtype if items in ``to_replace`` not in values (:issue:`32988`) - .. _whatsnew_104.bug_fixes: diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 33a5d6443aa7c..5172b4a7097f3 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -765,11 +765,6 @@ def replace( filtered_out = ~self.mgr_locs.isin(filter) mask[filtered_out.nonzero()[0]] = False - if not mask.any(): - if inplace: - return [self] - return [self.copy()] - try: blocks = self.putmask(mask, value, inplace=inplace) # Note: it is _not_ the case that self._can_hold_element(value) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 92b74c4409d7d..e3fe08d848c20 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1363,3 +1363,22 @@ def test_replace_after_convert_dtypes(self): result = df.replace(1, 10) expected = pd.DataFrame({"grp": [10, 2, 3, 4, 5]}, dtype="Int64") tm.assert_frame_equal(result, expected) + + def test_replace_invalid_to_replace(self): + # GH 18634 + # API: replace() should raise an exception if invalid argument is given + df = pd.DataFrame({"one": ["a", "b ", "c"], "two": ["d ", "e ", "f "]}) + msg = ( + r"Expecting 'to_replace' to be either a scalar, array-like, " + r"dict or None, got invalid type.*" + ) + with pytest.raises(TypeError, match=msg): + df.replace(lambda x: x.strip()) + + @pytest.mark.parametrize("dtype", ["float", "float64", "int64", "Int64", "boolean"]) + @pytest.mark.parametrize("value", [np.nan, pd.NA]) + def test_replace_no_replacement_dtypes(self, dtype, value): + # https://github.com/pandas-dev/pandas/issues/32988 + df = pd.DataFrame(np.eye(2), dtype=dtype) + result = df.replace(to_replace=[None, -np.inf, np.inf], value=value) + tm.assert_frame_equal(result, df) From 995d3e403f68504a31b301ed9244717df35153a6 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Mon, 11 May 2020 15:29:29 +0100 Subject: [PATCH 2/2] fixup cherry-pick --- pandas/tests/frame/methods/test_replace.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index e3fe08d848c20..97ef36bb8b599 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1364,17 +1364,6 @@ def test_replace_after_convert_dtypes(self): expected = pd.DataFrame({"grp": [10, 2, 3, 4, 5]}, dtype="Int64") tm.assert_frame_equal(result, expected) - def test_replace_invalid_to_replace(self): - # GH 18634 - # API: replace() should raise an exception if invalid argument is given - df = pd.DataFrame({"one": ["a", "b ", "c"], "two": ["d ", "e ", "f "]}) - msg = ( - r"Expecting 'to_replace' to be either a scalar, array-like, " - r"dict or None, got invalid type.*" - ) - with pytest.raises(TypeError, match=msg): - df.replace(lambda x: x.strip()) - @pytest.mark.parametrize("dtype", ["float", "float64", "int64", "Int64", "boolean"]) @pytest.mark.parametrize("value", [np.nan, pd.NA]) def test_replace_no_replacement_dtypes(self, dtype, value):