Skip to content

REF: simplify Block.replace #37781

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Nov 13, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -580,6 +580,7 @@ Other

- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` incorrectly raising ``AssertionError`` instead of ``ValueError`` when invalid parameter combinations are passed (:issue:`36045`)
- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` with numeric values and string ``to_replace`` (:issue:`34789`)
- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` incorrectly casting from ``PeriodDtype`` to object dtype (:issue:`34871`)
- Fixed bug in metadata propagation incorrectly copying DataFrame columns as metadata when the column name overlaps with the metadata name (:issue:`37037`)
- Fixed metadata propagation in the :class:`Series.dt`, :class:`Series.str` accessors, :class:`DataFrame.duplicated`, :class:`DataFrame.stack`, :class:`DataFrame.unstack`, :class:`DataFrame.pivot`, :class:`DataFrame.append`, :class:`DataFrame.diff`, :class:`DataFrame.applymap` and :class:`DataFrame.update` methods (:issue:`28283`) (:issue:`37381`)
- Bug in :meth:`Index.union` behaving differently depending on whether operand is a :class:`Index` or other list-like (:issue:`36384`)
Expand Down
33 changes: 9 additions & 24 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -752,36 +752,21 @@ def replace(
to_replace = convert_scalar_for_putitemlike(to_replace, values.dtype)

mask = missing.mask_missing(values, to_replace)
if not mask.any():
# Note: we get here with test_replace_extension_other incorrectly
# bc _can_hold_element is incorrect.
return [self] if inplace else [self.copy()]

try:
blocks = self.putmask(mask, value, inplace=inplace)
# Note: it is _not_ the case that self._can_hold_element(value)
# is always true at this point. In particular, that can fail
# for:
# "2u" with bool-dtype, float-dtype
# 0.5 with int64-dtype
# np.nan with int64-dtype
except (TypeError, ValueError):
# GH 22083, TypeError or ValueError occurred within error handling
# causes infinite loop. Cast and retry only if not objectblock.
if is_object_dtype(self):
raise

if not self.is_extension:
# TODO: https://github.com/pandas-dev/pandas/issues/32586
# Need an ExtensionArray._can_hold_element to indicate whether
# a scalar value can be placed in the array.
assert not self._can_hold_element(value), value

# try again with a compatible block
block = self.astype(object)
return block.replace(
if not self._can_hold_element(value):
blk = self.astype(object)
return blk.replace(
to_replace=original_to_replace,
value=value,
inplace=inplace,
inplace=True,
regex=regex,
)

blocks = self.putmask(mask, value, inplace=inplace)
blocks = extend_blocks(
[b.convert(numeric=False, copy=not inplace) for b in blocks]
)
Expand Down
16 changes: 8 additions & 8 deletions pandas/tests/frame/methods/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -1523,18 +1523,18 @@ def test_replace_with_duplicate_columns(self, replacement):

tm.assert_frame_equal(result, expected)

@pytest.mark.xfail(
reason="replace() changes dtype from period to object, see GH34871", strict=True
)
def test_replace_period_ignore_float(self):
def test_replace_period_ignore_float(self, frame_or_series):
"""
Regression test for GH#34871: if df.replace(1.0, 0.0) is called on a df
with a Period column the old, faulty behavior is to raise TypeError.
"""
df = DataFrame({"Per": [pd.Period("2020-01")] * 3})
result = df.replace(1.0, 0.0)
expected = DataFrame({"Per": [pd.Period("2020-01")] * 3})
tm.assert_frame_equal(expected, result)
obj = DataFrame({"Per": [pd.Period("2020-01")] * 3})
if frame_or_series is not DataFrame:
obj = obj["Per"]

expected = obj.copy()
result = obj.replace(1.0, 0.0)
tm.assert_equal(expected, result)

def test_replace_value_category_type(self):
"""
Expand Down
8 changes: 5 additions & 3 deletions pandas/tests/series/methods/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -424,10 +424,12 @@ def test_replace_only_one_dictlike_arg(self):
with pytest.raises(ValueError, match=msg):
ser.replace(to_replace, value)

def test_replace_extension_other(self):
def test_replace_extension_other(self, frame_or_series):
# https://github.com/pandas-dev/pandas/issues/34530
ser = pd.Series(pd.array([1, 2, 3], dtype="Int64"))
ser.replace("", "") # no exception
obj = frame_or_series(pd.array([1, 2, 3], dtype="Int64"))
result = obj.replace("", "") # no exception
# should not have changed dtype
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in master we get back int64 instead of Int64

tm.assert_equal(obj, result)

def test_replace_with_compiled_regex(self):
# https://github.com/pandas-dev/pandas/issues/35680
Expand Down