From c2d99ba2ec19af902ab7e27c01a5c318dd39c90a Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Mon, 4 Apr 2022 19:20:03 +0000 Subject: [PATCH 1/2] REGR: Replace changes the dtype of other columns --- doc/source/whatsnew/v1.4.3.rst | 2 +- pandas/core/internals/blocks.py | 14 ++++++++------ pandas/tests/frame/methods/test_replace.py | 19 +++++++++++++++++++ 3 files changed, 28 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v1.4.3.rst b/doc/source/whatsnew/v1.4.3.rst index d53acc698c3bb..f8de590fce686 100644 --- a/doc/source/whatsnew/v1.4.3.rst +++ b/doc/source/whatsnew/v1.4.3.rst @@ -14,7 +14,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ -- +- Fixed regression in :meth:`DataFrame.replace` when the replacement value was explicitly ``None`` when passed in a dictionary to ``to_replace`` also casting other columns to object dtype even when there were no values to replace (:issue:`46634`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 26569b571724d..2b4b9bc247457 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -781,12 +781,14 @@ def _replace_coerce( ) else: if value is None: - # gh-45601, gh-45836 - nb = self.astype(np.dtype(object), copy=False) - if nb is self and not inplace: - nb = nb.copy() - putmask_inplace(nb.values, mask, value) - return [nb] + # gh-45601, gh-45836, gh-46634 + if mask.any(): + nb = self.astype(np.dtype(object), copy=False) + if nb is self and not inplace: + nb = nb.copy() + putmask_inplace(nb.values, mask, value) + return [nb] + return [self] return self.replace( to_replace=to_replace, value=value, inplace=inplace, mask=mask ) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 870a64cfa59c9..f7504e9173bf5 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -675,6 +675,25 @@ def test_replace_NAT_with_None(self): expected = DataFrame([None, None]) tm.assert_frame_equal(result, expected) + def test_replace_with_None_keeps_categorical(self): + # gh-46634 + cat_series = Series(["b", "b", "b", "d"], dtype="category") + df = DataFrame( + { + "id": Series([5, 4, 3, 2], dtype="float64"), + "col": cat_series, + } + ) + result = df.replace({3: None}) + + expected = DataFrame( + { + "id": Series([5.0, 4.0, None, 2.0], dtype="object"), + "col": cat_series, + } + ) + tm.assert_frame_equal(result, expected) + def test_replace_value_is_none(self, datetime_frame): orig_value = datetime_frame.iloc[0, 0] orig2 = datetime_frame.iloc[1, 0] From 4aaa243f96654f08e20566ae21a41e005b943a58 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Tue, 12 Apr 2022 11:19:54 +0000 Subject: [PATCH 2/2] copy if not inplace --- pandas/core/internals/blocks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 2b4b9bc247457..9aa61348661c6 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -788,7 +788,7 @@ def _replace_coerce( nb = nb.copy() putmask_inplace(nb.values, mask, value) return [nb] - return [self] + return [self] if inplace else [self.copy()] return self.replace( to_replace=to_replace, value=value, inplace=inplace, mask=mask )