Skip to content

BUG: Series.replace with value=None explicitly #45081

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Dec 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -939,6 +939,8 @@ ExtensionArray
- Bug in :func:`array` incorrectly raising when passed a ``ndarray`` with ``float16`` dtype (:issue:`44715`)
- Bug in calling ``np.sqrt`` on :class:`BooleanArray` returning a malformed :class:`FloatingArray` (:issue:`44715`)
- Bug in :meth:`Series.where` with ``ExtensionDtype`` when ``other`` is a NA scalar incompatible with the series dtype (e.g. ``NaT`` with a numeric dtype) incorrectly casting to a compatible NA value (:issue:`44697`)
- Fixed bug in :meth:`Series.replace` where explicitly passing ``value=None`` is treated as if no ``value`` was passed, and ``None`` not being in the result (:issue:`36984`, :issue:`19998`)
- Fixed bug in :meth:`Series.replace` with unwanted downcasting being done in no-op replacements (:issue:`44498`)
- Fixed bug in :meth:`Series.replace` with ``FloatDtype``, ``string[python]``, or ``string[pyarrow]`` dtype not being preserved when possible (:issue:`33484`, :issue:`40732`, :issue:`31644`, :issue:`41215`, :issue:`25438`)
-

Expand Down
4 changes: 2 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5265,11 +5265,11 @@ def pop(self, item: Hashable) -> Series:
def replace(
self,
to_replace=None,
value=None,
value=lib.no_default,
inplace: bool = False,
limit=None,
regex: bool = False,
method: str = "pad",
method: str | lib.NoDefault = lib.no_default,
):
return super().replace(
to_replace=to_replace,
Expand Down
14 changes: 11 additions & 3 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -6515,11 +6515,11 @@ def bfill(
def replace(
self,
to_replace=None,
value=None,
value=lib.no_default,
inplace: bool_t = False,
limit: int | None = None,
regex=False,
method="pad",
method=lib.no_default,
):
if not (
is_scalar(to_replace)
Expand All @@ -6538,7 +6538,15 @@ def replace(

self._consolidate_inplace()

if value is None:
if value is lib.no_default or method is not lib.no_default:
# GH#36984 if the user explicitly passes value=None we want to
# respect that. We have the corner case where the user explicitly
# passes value=None *and* a method, which we interpret as meaning
# they want the (documented) default behavior.
if method is lib.no_default:
# TODO: get this to show up as the default in the docs?
method = "pad"

# passing a single value that is scalar like
# when value is None (GH5319), for compat
if not is_dict_like(to_replace) and not is_dict_like(regex):
Expand Down
10 changes: 8 additions & 2 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -679,7 +679,12 @@ def replace(
elif self._can_hold_element(value):
blk = self if inplace else self.copy()
putmask_inplace(blk.values, mask, value)
blocks = blk.convert(numeric=False, copy=False)
if not (self.is_object and value is None):
# if the user *explicitly* gave None, we keep None, otherwise
# may downcast to NaN
blocks = blk.convert(numeric=False, copy=False)
else:
blocks = [blk]
return blocks

elif self.ndim == 1 or self.shape[0] == 1:
Expand Down Expand Up @@ -802,7 +807,8 @@ def replace_list(
inplace=inplace,
regex=regex,
)
if convert and blk.is_object:
if convert and blk.is_object and not all(x is None for x in dest_list):
# GH#44498 avoid unwanted cast-back
result = extend_blocks(
[b.convert(numeric=False, copy=True) for b in result]
)
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -4924,11 +4924,11 @@ def pop(self, item: Hashable) -> Any:
def replace(
self,
to_replace=None,
value=None,
value=lib.no_default,
inplace=False,
limit=None,
regex=False,
method="pad",
method: str | lib.NoDefault = lib.no_default,
):
return super().replace(
to_replace=to_replace,
Expand Down
22 changes: 17 additions & 5 deletions pandas/core/shared_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -693,18 +693,30 @@
4 None
dtype: object
When ``value=None`` and `to_replace` is a scalar, list or
tuple, `replace` uses the method parameter (default 'pad') to do the
When ``value`` is not explicitly passed and `to_replace` is a scalar, list
or tuple, `replace` uses the method parameter (default 'pad') to do the
replacement. So this is why the 'a' values are being replaced by 10
in rows 1 and 2 and 'b' in row 4 in this case.
The command ``s.replace('a', None)`` is actually equivalent to
``s.replace(to_replace='a', value=None, method='pad')``:
>>> s.replace('a', None)
>>> s.replace('a')
0 10
1 10
2 10
3 b
4 b
dtype: object
On the other hand, if ``None`` is explicitly passed for ``value``, it will
be respected:
>>> s.replace('a', None)
0 10
1 None
2 None
3 b
4 None
dtype: object
.. versionchanged:: 1.4.0
Previously the explicit ``None`` was silently ignored.
"""
39 changes: 39 additions & 0 deletions pandas/tests/series/methods/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,45 @@


class TestSeriesReplace:
def test_replace_explicit_none(self):
# GH#36984 if the user explicitly passes value=None, give it to them
ser = pd.Series([0, 0, ""], dtype=object)
result = ser.replace("", None)
expected = pd.Series([0, 0, None], dtype=object)
tm.assert_series_equal(result, expected)

df = pd.DataFrame(np.zeros((3, 3)))
df.iloc[2, 2] = ""
result = df.replace("", None)
expected = pd.DataFrame(
{
0: np.zeros(3),
1: np.zeros(3),
2: np.array([0.0, 0.0, None], dtype=object),
}
)
assert expected.iloc[2, 2] is None
tm.assert_frame_equal(result, expected)

# GH#19998 same thing with object dtype
ser = pd.Series([10, 20, 30, "a", "a", "b", "a"])
result = ser.replace("a", None)
expected = pd.Series([10, 20, 30, None, None, "b", None])
assert expected.iloc[-1] is None
tm.assert_series_equal(result, expected)

def test_replace_noop_doesnt_downcast(self):
# GH#44498
ser = pd.Series([None, None, pd.Timestamp("2021-12-16 17:31")], dtype=object)
res = ser.replace({np.nan: None}) # should be a no-op
tm.assert_series_equal(res, ser)
assert res.dtype == object

# same thing but different calling convention
res = ser.replace(np.nan, None)
tm.assert_series_equal(res, ser)
assert res.dtype == object

def test_replace(self):
N = 100
ser = pd.Series(np.random.randn(N))
Expand Down