From 45d8605666e5f8efda971f3ef01ebcf5de54d0a9 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 27 Dec 2021 12:10:28 -0800 Subject: [PATCH 1/2] BUG: Series.replace with value=None explicitly --- doc/source/whatsnew/v1.4.0.rst | 2 ++ pandas/core/frame.py | 4 +-- pandas/core/generic.py | 14 ++++++-- pandas/core/internals/blocks.py | 10 ++++-- pandas/core/series.py | 4 +-- pandas/tests/series/methods/test_replace.py | 39 +++++++++++++++++++++ 6 files changed, 64 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 7218c77e43409..56db2725a1cac 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -864,6 +864,8 @@ ExtensionArray - Bug in :func:`array` incorrectly raising when passed a ``ndarray`` with ``float16`` dtype (:issue:`44715`) - Bug in calling ``np.sqrt`` on :class:`BooleanArray` returning a malformed :class:`FloatingArray` (:issue:`44715`) - Bug in :meth:`Series.where` with ``ExtensionDtype`` when ``other`` is a NA scalar incompatible with the series dtype (e.g. ``NaT`` with a numeric dtype) incorrectly casting to a compatible NA value (:issue:`44697`) +- Fixed bug in :meth:`Series.replace` where explicitly passing ``value=None`` is treated as if no ``value`` was passed, and ``None`` not being in the result (:issue:`36984`, :issue:`19998`) +- Fixed bug in :meth:`Series.replace` with unwanted downcasting being done in no-op replacements (:issue:`44498`) - Fixed bug in :meth:`Series.replace` with ``FloatDtype``, ``string[python]``, or ``string[pyarrow]`` dtype not being preserved when possible (:issue:`33484`, :issue:`40732`, :issue:`31644`, :issue:`41215`, :issue:`25438`) - diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 794fb2afc7f9e..a6f7d17529d3e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5266,11 +5266,11 @@ def pop(self, item: Hashable) -> Series: def replace( self, to_replace=None, - value=None, + value=lib.no_default, inplace: bool = False, limit=None, regex: bool = False, - method: str = "pad", + method: str | lib.NoDefault = lib.no_default, ): return super().replace( to_replace=to_replace, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index fc15c846b1907..b5ca1e34b716a 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6510,11 +6510,11 @@ def bfill( def replace( self, to_replace=None, - value=None, + value=lib.no_default, inplace: bool_t = False, limit: int | None = None, regex=False, - method="pad", + method=lib.no_default, ): if not ( is_scalar(to_replace) @@ -6533,7 +6533,15 @@ def replace( self._consolidate_inplace() - if value is None: + if value is lib.no_default or method is not lib.no_default: + # GH#36984 if the user explicitly passes value=None we want to + # respect that. We have the corner case where the user explicitly + # passes value=None *and* a method, which we interpret as meaning + # they want the (documented) default behavior. + if method is lib.no_default: + # TODO: get this to show up as the default in the docs? + method = "pad" + # passing a single value that is scalar like # when value is None (GH5319), for compat if not is_dict_like(to_replace) and not is_dict_like(regex): diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 3f50cb2c601b5..0db981ab017da 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -679,7 +679,12 @@ def replace( elif self._can_hold_element(value): blk = self if inplace else self.copy() putmask_inplace(blk.values, mask, value) - blocks = blk.convert(numeric=False, copy=False) + if not (self.is_object and value is None): + # if the user *explicitly* gave None, we keep None, otherwise + # may downcast to NaN + blocks = blk.convert(numeric=False, copy=False) + else: + blocks = [blk] return blocks elif self.ndim == 1 or self.shape[0] == 1: @@ -802,7 +807,8 @@ def replace_list( inplace=inplace, regex=regex, ) - if convert and blk.is_object: + if convert and blk.is_object and not all(x is None for x in dest_list): + # GH#44498 avoid unwanted cast-back result = extend_blocks( [b.convert(numeric=False, copy=True) for b in result] ) diff --git a/pandas/core/series.py b/pandas/core/series.py index 746512e8fb7d6..4a06005988c08 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4902,11 +4902,11 @@ def pop(self, item: Hashable) -> Any: def replace( self, to_replace=None, - value=None, + value=lib.no_default, inplace=False, limit=None, regex=False, - method="pad", + method: str | lib.NoDefault = lib.no_default, ): return super().replace( to_replace=to_replace, diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index c0234ee2649b5..6a8dacfda5e78 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -9,6 +9,45 @@ class TestSeriesReplace: + def test_replace_explicit_none(self): + # GH#36984 if the user explicitly passes value=None, give it to them + ser = pd.Series([0, 0, ""], dtype=object) + result = ser.replace("", None) + expected = pd.Series([0, 0, None], dtype=object) + tm.assert_series_equal(result, expected) + + df = pd.DataFrame(np.zeros((3, 3))) + df.iloc[2, 2] = "" + result = df.replace("", None) + expected = pd.DataFrame( + { + 0: np.zeros(3), + 1: np.zeros(3), + 2: np.array([0.0, 0.0, None], dtype=object), + } + ) + assert expected.iloc[2, 2] is None + tm.assert_frame_equal(result, expected) + + # GH#19998 same thing with object dtype + ser = pd.Series([10, 20, 30, "a", "a", "b", "a"]) + result = ser.replace("a", None) + expected = pd.Series([10, 20, 30, None, None, "b", None]) + assert expected.iloc[-1] is None + tm.assert_series_equal(result, expected) + + def test_replace_noop_doesnt_downcast(self): + # GH#44498 + ser = pd.Series([None, None, pd.Timestamp("2021-12-16 17:31")], dtype=object) + res = ser.replace({np.nan: None}) # should be a no-op + tm.assert_series_equal(res, ser) + assert res.dtype == object + + # same thing but different calling convention + res = ser.replace(np.nan, None) + tm.assert_series_equal(res, ser) + assert res.dtype == object + def test_replace(self): N = 100 ser = pd.Series(np.random.randn(N)) From e58b204274b3ce094ce90d7be600b1f860ee9514 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 28 Dec 2021 17:57:07 -0800 Subject: [PATCH 2/2] update docstring --- pandas/core/shared_docs.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py index d853f728f64a3..f79fd3ed09f8d 100644 --- a/pandas/core/shared_docs.py +++ b/pandas/core/shared_docs.py @@ -693,18 +693,30 @@ 4 None dtype: object - When ``value=None`` and `to_replace` is a scalar, list or - tuple, `replace` uses the method parameter (default 'pad') to do the + When ``value`` is not explicitly passed and `to_replace` is a scalar, list + or tuple, `replace` uses the method parameter (default 'pad') to do the replacement. So this is why the 'a' values are being replaced by 10 in rows 1 and 2 and 'b' in row 4 in this case. - The command ``s.replace('a', None)`` is actually equivalent to - ``s.replace(to_replace='a', value=None, method='pad')``: - >>> s.replace('a', None) + >>> s.replace('a') 0 10 1 10 2 10 3 b 4 b dtype: object + + On the other hand, if ``None`` is explicitly passed for ``value``, it will + be respected: + + >>> s.replace('a', None) + 0 10 + 1 None + 2 None + 3 b + 4 None + dtype: object + + .. versionchanged:: 1.4.0 + Previously the explicit ``None`` was silently ignored. """