Skip to content

BUG: Series.replace does not preserve dtype of original Series #37512

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1165,6 +1165,13 @@ def coerce_to_target_dtype(self, other):
if is_dtype_equal(self.dtype, dtype):
return self

if self._can_hold_element(other):
return self
elif self.is_categorical:
# Note: this will be wrong if we ever have a tuple category
cat = self.values.add_categories(other)
return self.make_block(cat)

if self.is_bool or is_object_dtype(dtype) or is_bool_dtype(dtype):
# we don't upcast to bool
return self.astype(object)
Expand Down Expand Up @@ -2592,6 +2599,26 @@ def replace(
result.values.replace(to_replace, value, inplace=True)
return [result]

def _can_hold_element(self, element: Any) -> bool:
try:
self.values._validate_setitem_value(element)
return True
except (TypeError, ValueError):
return False

def putmask(
self, mask, new, inplace: bool = False, axis: int = 0, transpose: bool = False
) -> List["Block"]:
if self._can_hold_element(new):
return super().putmask(mask, new, inplace, axis, transpose)

# TODO: should this be inplace?
# TODO: use coerce_to_target_dtype?
cat = self.values.add_categories(new)
nb = self.make_block(cat)
assert nb._can_hold_element(new)
return nb.putmask(mask, new, inplace, axis, transpose)


# -----------------------------------------------------------------
# Constructor Helpers
Expand Down
39 changes: 38 additions & 1 deletion pandas/tests/series/methods/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import pandas as pd
import pandas._testing as tm
from pandas.core.arrays import IntervalArray


class TestSeriesReplace:
Expand Down Expand Up @@ -259,7 +260,7 @@ def test_replace2(self):
def test_replace_with_dictlike_and_string_dtype(self):
# GH 32621
s = pd.Series(["one", "two", np.nan], dtype="string")
expected = pd.Series(["1", "2", np.nan])
expected = pd.Series(["1", "2", np.nan], dtype="string")
result = s.replace({"one": "1", "two": "2"})
tm.assert_series_equal(expected, result)

Expand Down Expand Up @@ -460,3 +461,39 @@ def test_str_replace_regex_default_raises_warning(self, pattern):
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False) as w:
s.str.replace(pattern, "")
assert re.match(msg, str(w[0].message))

@pytest.mark.parametrize(
"dtype, input_data, to_replace, expected_data",
[
("bool", [True, False], {True: False}, [False, False]),
("int64", [1, 2], {1: 10, 2: 20}, [10, 20]),
("Int64", [1, 2], {1: 10, 2: 20}, [10, 20]),
("float64", [1.1, 2.2], {1.1: 10.1, 2.2: 20.5}, [10.1, 20.5]),
("Float64", [1.1, 2.2], {1.1: 10.1, 2.2: 20.5}, [10.1, 20.5]),
("string", ["one", "two"], {"one": "1", "two": "2"}, ["1", "2"]),
(
pd.IntervalDtype("int64"),
IntervalArray([pd.Interval(1, 2), pd.Interval(2, 3)]),
{pd.Interval(1, 2): pd.Interval(10, 20)},
IntervalArray([pd.Interval(10, 20), pd.Interval(2, 3)]),
),
(
pd.IntervalDtype("float64"),
IntervalArray([pd.Interval(1.0, 2.7), pd.Interval(2.8, 3.1)]),
{pd.Interval(1.0, 2.7): pd.Interval(10.6, 20.8)},
IntervalArray([pd.Interval(10.6, 20.8), pd.Interval(2.8, 3.1)]),
),
(
pd.PeriodDtype("M"),
[pd.Period("2020-05", freq="M")],
{pd.Period("2020-05", freq="M"): pd.Period("2020-06", freq="M")},
[pd.Period("2020-06", freq="M")],
),
],
)
def test_replace_dtype(self, dtype, input_data, to_replace, expected_data):
# GH 33484
s = pd.Series(input_data, dtype=dtype)
result = s.replace(to_replace)
expected = pd.Series(expected_data, dtype=dtype)
tm.assert_series_equal(result, expected)