Skip to content

BUG: pd.Series.replace does not preserve the original dtype #33622

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
4 changes: 4 additions & 0 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
PandasDtype,
TimedeltaArray,
)
from pandas.core.arrays.string_ import StringDtype
from pandas.core.base import PandasObject
import pandas.core.common as com
from pandas.core.construction import extract_array
Expand Down Expand Up @@ -1007,6 +1008,9 @@ def coerce_to_target_dtype(self, other):
# if we cannot then coerce to object
dtype, _ = infer_dtype_from(other, pandas_dtype=True)

if isinstance(self.dtype, StringDtype):
dtype = StringDtype()

if is_dtype_equal(self.dtype, dtype):
return self

Expand Down
19 changes: 18 additions & 1 deletion pandas/tests/series/methods/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ def test_replace2(self):
def test_replace_with_dictlike_and_string_dtype(self):
# GH 32621
s = pd.Series(["one", "two", np.nan], dtype="string")
expected = pd.Series(["1", "2", np.nan])
expected = pd.Series(["1", "2", np.nan], dtype="string")
result = s.replace({"one": "1", "two": "2"})
tm.assert_series_equal(expected, result)

Expand Down Expand Up @@ -402,3 +402,20 @@ def test_replace_only_one_dictlike_arg(self):
msg = "Series.replace cannot use dict-value and non-None to_replace"
with pytest.raises(ValueError, match=msg):
ser.replace(to_replace, value)

@pytest.mark.parametrize(
"series, to_replace, expected",
[
(
pd.Series(["one", "two"], dtype="string"),
{"one": "1", "two": "2"},
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a bunch more tests which can hit the other EA types, e.g. Intervval, Period, Datetime w/tz

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added test cases.

"string",
),
(pd.Series([1, 2], dtype="int64"), {1: 10, 2: 20}, "int64"),
(pd.Series([True, False], dtype="bool"), {True: False}, "bool"),
],
)
def test_replace_dtype(self, series, to_replace, expected):
# GH 33484
result = str(series.replace(to_replace).dtype)
assert expected == result