Skip to content

Commit df28239

Browse files
authored
BUG: Series.replace(method='pad') with EA dtypes (#44270)
1 parent 8d868db commit df28239

File tree

7 files changed

+93
-12
lines changed

7 files changed

+93
-12
lines changed

doc/source/whatsnew/v1.4.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -655,6 +655,8 @@ Other
655655
- Bug in :meth:`RangeIndex.union` with another ``RangeIndex`` with matching (even) ``step`` and starts differing by strictly less than ``step / 2`` (:issue:`44019`)
656656
- Bug in :meth:`RangeIndex.difference` with ``sort=None`` and ``step<0`` failing to sort (:issue:`44085`)
657657
- Bug in :meth:`Series.to_frame` and :meth:`Index.to_frame` ignoring the ``name`` argument when ``name=None`` is explicitly passed (:issue:`44212`)
658+
- Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` with ``value=None`` and ExtensionDtypes (:issue:`44270`)
659+
-
658660

659661
.. ***DO NOT USE THIS SECTION***
660662

pandas/core/arrays/_mixins.py

+8
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,14 @@ def __getitem__(
246246
result = self._from_backing_data(result)
247247
return result
248248

249+
def _fill_mask_inplace(
250+
self, method: str, limit, mask: npt.NDArray[np.bool_]
251+
) -> None:
252+
# (for now) when self.ndim == 2, we assume axis=0
253+
func = missing.get_fill_func(method, ndim=self.ndim)
254+
func(self._ndarray.T, limit=limit, mask=mask.T)
255+
return
256+
249257
@doc(ExtensionArray.fillna)
250258
def fillna(
251259
self: NDArrayBackedExtensionArrayT, value=None, method=None, limit=None

pandas/core/arrays/base.py

+18
Original file line numberDiff line numberDiff line change
@@ -1434,6 +1434,24 @@ def _where(
14341434
result[~mask] = val
14351435
return result
14361436

1437+
def _fill_mask_inplace(
1438+
self, method: str, limit, mask: npt.NDArray[np.bool_]
1439+
) -> None:
1440+
"""
1441+
Replace values in locations specified by 'mask' using pad or backfill.
1442+
1443+
See also
1444+
--------
1445+
ExtensionArray.fillna
1446+
"""
1447+
func = missing.get_fill_func(method)
1448+
# NB: if we don't copy mask here, it may be altered inplace, which
1449+
# would mess up the `self[mask] = ...` below.
1450+
new_values, _ = func(self.astype(object), limit=limit, mask=mask.copy())
1451+
new_values = self._from_sequence(new_values, dtype=self.dtype)
1452+
self[mask] = new_values[mask]
1453+
return
1454+
14371455
@classmethod
14381456
def _empty(cls, shape: Shape, dtype: ExtensionDtype):
14391457
"""

pandas/core/generic.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -6518,10 +6518,13 @@ def replace(
65186518

65196519
if isinstance(to_replace, (tuple, list)):
65206520
if isinstance(self, ABCDataFrame):
6521-
return self.apply(
6521+
result = self.apply(
65226522
self._constructor_sliced._replace_single,
65236523
args=(to_replace, method, inplace, limit),
65246524
)
6525+
if inplace:
6526+
return
6527+
return result
65256528
self = cast("Series", self)
65266529
return self._replace_single(to_replace, method, inplace, limit)
65276530

pandas/core/missing.py

+3
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,9 @@ def mask_missing(arr: ArrayLike, values_to_mask) -> npt.NDArray[np.bool_]:
9797
if na_mask.any():
9898
mask |= isna(arr)
9999

100+
if not isinstance(mask, np.ndarray):
101+
# e.g. if arr is IntegerArray, then mask is BooleanArray
102+
mask = mask.to_numpy(dtype=bool, na_value=False)
100103
return mask
101104

102105

pandas/core/series.py

+8-11
Original file line numberDiff line numberDiff line change
@@ -4906,23 +4906,20 @@ def _replace_single(self, to_replace, method: str, inplace: bool, limit):
49064906
replacement value is given in the replace method
49074907
"""
49084908

4909-
orig_dtype = self.dtype
49104909
result = self if inplace else self.copy()
4911-
fill_f = missing.get_fill_func(method)
49124910

4913-
mask = missing.mask_missing(result.values, to_replace)
4914-
values, _ = fill_f(result.values, limit=limit, mask=mask)
4911+
values = result._values
4912+
mask = missing.mask_missing(values, to_replace)
49154913

4916-
if values.dtype == orig_dtype and inplace:
4917-
return
4918-
4919-
result = self._constructor(values, index=self.index, dtype=self.dtype)
4920-
result = result.__finalize__(self)
4914+
if isinstance(values, ExtensionArray):
4915+
# dispatch to the EA's _pad_mask_inplace method
4916+
values._fill_mask_inplace(method, limit, mask)
4917+
else:
4918+
fill_f = missing.get_fill_func(method)
4919+
values, _ = fill_f(values, limit=limit, mask=mask)
49214920

49224921
if inplace:
4923-
self._update_inplace(result)
49244922
return
4925-
49264923
return result
49274924

49284925
# error: Cannot determine type of 'shift'

pandas/tests/series/methods/test_replace.py

+50
Original file line numberDiff line numberDiff line change
@@ -442,6 +442,56 @@ def test_replace_extension_other(self, frame_or_series):
442442
# should not have changed dtype
443443
tm.assert_equal(obj, result)
444444

445+
def _check_replace_with_method(self, ser: pd.Series):
446+
df = ser.to_frame()
447+
448+
res = ser.replace(ser[1], method="pad")
449+
expected = pd.Series([ser[0], ser[0]] + list(ser[2:]), dtype=ser.dtype)
450+
tm.assert_series_equal(res, expected)
451+
452+
res_df = df.replace(ser[1], method="pad")
453+
tm.assert_frame_equal(res_df, expected.to_frame())
454+
455+
ser2 = ser.copy()
456+
res2 = ser2.replace(ser[1], method="pad", inplace=True)
457+
assert res2 is None
458+
tm.assert_series_equal(ser2, expected)
459+
460+
res_df2 = df.replace(ser[1], method="pad", inplace=True)
461+
assert res_df2 is None
462+
tm.assert_frame_equal(df, expected.to_frame())
463+
464+
def test_replace_ea_dtype_with_method(self, any_numeric_ea_dtype):
465+
arr = pd.array([1, 2, pd.NA, 4], dtype=any_numeric_ea_dtype)
466+
ser = pd.Series(arr)
467+
468+
self._check_replace_with_method(ser)
469+
470+
@pytest.mark.parametrize("as_categorical", [True, False])
471+
def test_replace_interval_with_method(self, as_categorical):
472+
# in particular interval that can't hold NA
473+
474+
idx = pd.IntervalIndex.from_breaks(range(4))
475+
ser = pd.Series(idx)
476+
if as_categorical:
477+
ser = ser.astype("category")
478+
479+
self._check_replace_with_method(ser)
480+
481+
@pytest.mark.parametrize("as_period", [True, False])
482+
@pytest.mark.parametrize("as_categorical", [True, False])
483+
def test_replace_datetimelike_with_method(self, as_period, as_categorical):
484+
idx = pd.date_range("2016-01-01", periods=5, tz="US/Pacific")
485+
if as_period:
486+
idx = idx.tz_localize(None).to_period("D")
487+
488+
ser = pd.Series(idx)
489+
ser.iloc[-2] = pd.NaT
490+
if as_categorical:
491+
ser = ser.astype("category")
492+
493+
self._check_replace_with_method(ser)
494+
445495
def test_replace_with_compiled_regex(self):
446496
# https://github.com/pandas-dev/pandas/issues/35680
447497
s = pd.Series(["a", "b", "c"])

0 commit comments

Comments
 (0)