Skip to content

Commit af76bd5

Browse files
authored
BUG: Series.replace with value=None explicitly (#45081)
1 parent d3f665d commit af76bd5

File tree

7 files changed

+81
-14
lines changed

7 files changed

+81
-14
lines changed

doc/source/whatsnew/v1.4.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -940,6 +940,8 @@ ExtensionArray
940940
- Bug in :func:`array` incorrectly raising when passed a ``ndarray`` with ``float16`` dtype (:issue:`44715`)
941941
- Bug in calling ``np.sqrt`` on :class:`BooleanArray` returning a malformed :class:`FloatingArray` (:issue:`44715`)
942942
- Bug in :meth:`Series.where` with ``ExtensionDtype`` when ``other`` is a NA scalar incompatible with the series dtype (e.g. ``NaT`` with a numeric dtype) incorrectly casting to a compatible NA value (:issue:`44697`)
943+
- Fixed bug in :meth:`Series.replace` where explicitly passing ``value=None`` is treated as if no ``value`` was passed, and ``None`` not being in the result (:issue:`36984`, :issue:`19998`)
944+
- Fixed bug in :meth:`Series.replace` with unwanted downcasting being done in no-op replacements (:issue:`44498`)
943945
- Fixed bug in :meth:`Series.replace` with ``FloatDtype``, ``string[python]``, or ``string[pyarrow]`` dtype not being preserved when possible (:issue:`33484`, :issue:`40732`, :issue:`31644`, :issue:`41215`, :issue:`25438`)
944946
-
945947

pandas/core/frame.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -5265,11 +5265,11 @@ def pop(self, item: Hashable) -> Series:
52655265
def replace(
52665266
self,
52675267
to_replace=None,
5268-
value=None,
5268+
value=lib.no_default,
52695269
inplace: bool = False,
52705270
limit=None,
52715271
regex: bool = False,
5272-
method: str = "pad",
5272+
method: str | lib.NoDefault = lib.no_default,
52735273
):
52745274
return super().replace(
52755275
to_replace=to_replace,

pandas/core/generic.py

+11-3
Original file line numberDiff line numberDiff line change
@@ -6515,11 +6515,11 @@ def bfill(
65156515
def replace(
65166516
self,
65176517
to_replace=None,
6518-
value=None,
6518+
value=lib.no_default,
65196519
inplace: bool_t = False,
65206520
limit: int | None = None,
65216521
regex=False,
6522-
method="pad",
6522+
method=lib.no_default,
65236523
):
65246524
if not (
65256525
is_scalar(to_replace)
@@ -6538,7 +6538,15 @@ def replace(
65386538

65396539
self._consolidate_inplace()
65406540

6541-
if value is None:
6541+
if value is lib.no_default or method is not lib.no_default:
6542+
# GH#36984 if the user explicitly passes value=None we want to
6543+
# respect that. We have the corner case where the user explicitly
6544+
# passes value=None *and* a method, which we interpret as meaning
6545+
# they want the (documented) default behavior.
6546+
if method is lib.no_default:
6547+
# TODO: get this to show up as the default in the docs?
6548+
method = "pad"
6549+
65426550
# passing a single value that is scalar like
65436551
# when value is None (GH5319), for compat
65446552
if not is_dict_like(to_replace) and not is_dict_like(regex):

pandas/core/internals/blocks.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -679,7 +679,12 @@ def replace(
679679
elif self._can_hold_element(value):
680680
blk = self if inplace else self.copy()
681681
putmask_inplace(blk.values, mask, value)
682-
blocks = blk.convert(numeric=False, copy=False)
682+
if not (self.is_object and value is None):
683+
# if the user *explicitly* gave None, we keep None, otherwise
684+
# may downcast to NaN
685+
blocks = blk.convert(numeric=False, copy=False)
686+
else:
687+
blocks = [blk]
683688
return blocks
684689

685690
elif self.ndim == 1 or self.shape[0] == 1:
@@ -802,7 +807,8 @@ def replace_list(
802807
inplace=inplace,
803808
regex=regex,
804809
)
805-
if convert and blk.is_object:
810+
if convert and blk.is_object and not all(x is None for x in dest_list):
811+
# GH#44498 avoid unwanted cast-back
806812
result = extend_blocks(
807813
[b.convert(numeric=False, copy=True) for b in result]
808814
)

pandas/core/series.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -4924,11 +4924,11 @@ def pop(self, item: Hashable) -> Any:
49244924
def replace(
49254925
self,
49264926
to_replace=None,
4927-
value=None,
4927+
value=lib.no_default,
49284928
inplace=False,
49294929
limit=None,
49304930
regex=False,
4931-
method="pad",
4931+
method: str | lib.NoDefault = lib.no_default,
49324932
):
49334933
return super().replace(
49344934
to_replace=to_replace,

pandas/core/shared_docs.py

+17-5
Original file line numberDiff line numberDiff line change
@@ -693,18 +693,30 @@
693693
4 None
694694
dtype: object
695695
696-
When ``value=None`` and `to_replace` is a scalar, list or
697-
tuple, `replace` uses the method parameter (default 'pad') to do the
696+
When ``value`` is not explicitly passed and `to_replace` is a scalar, list
697+
or tuple, `replace` uses the method parameter (default 'pad') to do the
698698
replacement. So this is why the 'a' values are being replaced by 10
699699
in rows 1 and 2 and 'b' in row 4 in this case.
700-
The command ``s.replace('a', None)`` is actually equivalent to
701-
``s.replace(to_replace='a', value=None, method='pad')``:
702700
703-
>>> s.replace('a', None)
701+
>>> s.replace('a')
704702
0 10
705703
1 10
706704
2 10
707705
3 b
708706
4 b
709707
dtype: object
708+
709+
On the other hand, if ``None`` is explicitly passed for ``value``, it will
710+
be respected:
711+
712+
>>> s.replace('a', None)
713+
0 10
714+
1 None
715+
2 None
716+
3 b
717+
4 None
718+
dtype: object
719+
720+
.. versionchanged:: 1.4.0
721+
Previously the explicit ``None`` was silently ignored.
710722
"""

pandas/tests/series/methods/test_replace.py

+39
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,45 @@
99

1010

1111
class TestSeriesReplace:
12+
def test_replace_explicit_none(self):
13+
# GH#36984 if the user explicitly passes value=None, give it to them
14+
ser = pd.Series([0, 0, ""], dtype=object)
15+
result = ser.replace("", None)
16+
expected = pd.Series([0, 0, None], dtype=object)
17+
tm.assert_series_equal(result, expected)
18+
19+
df = pd.DataFrame(np.zeros((3, 3)))
20+
df.iloc[2, 2] = ""
21+
result = df.replace("", None)
22+
expected = pd.DataFrame(
23+
{
24+
0: np.zeros(3),
25+
1: np.zeros(3),
26+
2: np.array([0.0, 0.0, None], dtype=object),
27+
}
28+
)
29+
assert expected.iloc[2, 2] is None
30+
tm.assert_frame_equal(result, expected)
31+
32+
# GH#19998 same thing with object dtype
33+
ser = pd.Series([10, 20, 30, "a", "a", "b", "a"])
34+
result = ser.replace("a", None)
35+
expected = pd.Series([10, 20, 30, None, None, "b", None])
36+
assert expected.iloc[-1] is None
37+
tm.assert_series_equal(result, expected)
38+
39+
def test_replace_noop_doesnt_downcast(self):
40+
# GH#44498
41+
ser = pd.Series([None, None, pd.Timestamp("2021-12-16 17:31")], dtype=object)
42+
res = ser.replace({np.nan: None}) # should be a no-op
43+
tm.assert_series_equal(res, ser)
44+
assert res.dtype == object
45+
46+
# same thing but different calling convention
47+
res = ser.replace(np.nan, None)
48+
tm.assert_series_equal(res, ser)
49+
assert res.dtype == object
50+
1251
def test_replace(self):
1352
N = 100
1453
ser = pd.Series(np.random.randn(N))

0 commit comments

Comments
 (0)