Skip to content

BUG: pd.Series.replace does not preserve the original dtype #33622

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
9 changes: 8 additions & 1 deletion pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1010,7 +1010,12 @@ def coerce_to_target_dtype(self, other):
if is_dtype_equal(self.dtype, dtype):
return self

if self.is_bool or is_object_dtype(dtype) or is_bool_dtype(dtype):
if is_extension_array_dtype(self.dtype) and not is_categorical_dtype(
self.dtype
):
dtype = self.dtype

elif self.is_bool or is_object_dtype(dtype) or is_bool_dtype(dtype):
# we don't upcast to bool
return self.astype(object)

Expand Down Expand Up @@ -1053,6 +1058,8 @@ def coerce_to_target_dtype(self, other):
raise AssertionError(
f"possible recursion in coerce_to_target_dtype: {self} {other}"
)
if is_categorical_dtype(dtype) or self.is_datetime:
return self.astype(object)

try:
return self.astype(dtype)
Expand Down
52 changes: 51 additions & 1 deletion pandas/tests/series/methods/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import pandas as pd
import pandas._testing as tm
from pandas.core.arrays import IntervalArray


class TestSeriesReplace:
Expand Down Expand Up @@ -250,7 +251,7 @@ def test_replace2(self):
def test_replace_with_dictlike_and_string_dtype(self):
# GH 32621
s = pd.Series(["one", "two", np.nan], dtype="string")
expected = pd.Series(["1", "2", np.nan])
expected = pd.Series(["1", "2", np.nan], dtype="string")
result = s.replace({"one": "1", "two": "2"})
tm.assert_series_equal(expected, result)

Expand Down Expand Up @@ -402,3 +403,52 @@ def test_replace_only_one_dictlike_arg(self):
msg = "Series.replace cannot use dict-value and non-None to_replace"
with pytest.raises(ValueError, match=msg):
ser.replace(to_replace, value)

@pytest.mark.parametrize(
"series, to_replace, expected",
[
(
pd.Series(["one", "two"], dtype="string"),
{"one": "1", "two": "2"},
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a bunch more tests which can hit the other EA types, e.g. Intervval, Period, Datetime w/tz

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added test cases.

"string",
),
(pd.Series([1, 2], dtype="int64"), {1: 10, 2: 20}, "int64"),
(pd.Series([True, False], dtype="bool"), {True: False}, "bool"),
(
pd.Series(IntervalArray.from_breaks([1, 2, 3, 4]), dtype=pd.IntervalDtype("int64")),
{pd.Interval(1, 2): pd.Interval(10, 20)},
"interval[int64]",
),
(
pd.Series(IntervalArray.from_breaks([1, 2, 3, 4]), dtype=pd.IntervalDtype("float64")),
{pd.Interval(1, 2): pd.Interval(0.2, 0.3)},
"interval[float64]",
),
(
pd.Series([pd.Period("2020-05", freq="M")], dtype=pd.PeriodDtype("M")),
{pd.Period("2020-05", freq="M"): pd.Period("2020-06", freq="M")},
"period[M]",
),
(
pd.Series(
pd.arrays.DatetimeArray(
np.array(
["2000-01-01T12:00:00", "2000-01-02T12:00:00"],
dtype="M8[ns]",
),
dtype=pd.DatetimeTZDtype(tz="US/Central"),
)
),
{
pd.Timestamp(
"2000-01-01 06:00:00-0600", tz="US/Central"
): pd.Timestamp("2000-01-01 12:00:00-0600", tz="US/Central")
},
"datetime64[ns, US/Central]",
),
],
)
def test_replace_dtype(self, series, to_replace, expected):
# GH 33484
result = series.replace(to_replace).dtype
assert expected == result